Previous fix from gnezdo@ pointed out that `u_flags' accesses should be
serialized by `vmobjlock'.  Diff below documents this and fix the
remaining places where the lock isn't yet taken.  One exception still
remains, the first loop of uvm_vnp_sync().  This cannot be fixed right
now due to possible deadlocks but that's not a reason for not documenting
& fixing the rest of this file.

This has been tested on amd64 and arm64.

Comments?  Oks?

Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.128
diff -u -p -r1.128 uvm_vnode.c
--- uvm/uvm_vnode.c     10 Sep 2022 16:14:36 -0000      1.128
+++ uvm/uvm_vnode.c     10 Sep 2022 18:23:57 -0000
@@ -68,11 +68,8 @@
  * we keep a simpleq of vnodes that are currently being sync'd.
  */
 
-LIST_HEAD(uvn_list_struct, uvm_vnode);
-struct uvn_list_struct uvn_wlist;      /* writeable uvns */
-
-SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
-struct uvn_sq_struct uvn_sync_q;               /* sync'ing uvns */
+LIST_HEAD(, uvm_vnode)         uvn_wlist;      /* [K] writeable uvns */
+SIMPLEQ_HEAD(, uvm_vnode)      uvn_sync_q;     /* [S] sync'ing uvns */
 struct rwlock uvn_sync_lock;                   /* locks sync operation */
 
 extern int rebooting;
@@ -144,41 +141,40 @@ uvn_attach(struct vnode *vp, vm_prot_t a
        struct partinfo pi;
        u_quad_t used_vnode_size = 0;
 
-       /* first get a lock on the uvn. */
-       while (uvn->u_flags & UVM_VNODE_BLOCKED) {
-               uvn->u_flags |= UVM_VNODE_WANTED;
-               tsleep_nsec(uvn, PVM, "uvn_attach", INFSLP);
-       }
-
        /* if we're mapping a BLK device, make sure it is a disk. */
        if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
                return NULL;
        }
 
+       /* first get a lock on the uvn. */
+       rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
+       while (uvn->u_flags & UVM_VNODE_BLOCKED) {
+               uvn->u_flags |= UVM_VNODE_WANTED;
+               rwsleep_nsec(uvn, uvn->u_obj.vmobjlock, PVM, "uvn_attach",
+                   INFSLP);
+       }
+
        /*
         * now uvn must not be in a blocked state.
         * first check to see if it is already active, in which case
         * we can bump the reference count, check to see if we need to
         * add it to the writeable list, and then return.
         */
-       rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
        if (uvn->u_flags & UVM_VNODE_VALID) {   /* already active? */
                KASSERT(uvn->u_obj.uo_refs > 0);
 
                uvn->u_obj.uo_refs++;           /* bump uvn ref! */
-               rw_exit(uvn->u_obj.vmobjlock);
 
                /* check for new writeable uvn */
                if ((accessprot & PROT_WRITE) != 0 &&
                    (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
-                       LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
-                       /* we are now on wlist! */
                        uvn->u_flags |= UVM_VNODE_WRITEABLE;
+                       LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
                }
+               rw_exit(uvn->u_obj.vmobjlock);
 
                return (&uvn->u_obj);
        }
-       rw_exit(uvn->u_obj.vmobjlock);
 
        /*
         * need to call VOP_GETATTR() to get the attributes, but that could
@@ -189,6 +185,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a
         * it.
         */
        uvn->u_flags = UVM_VNODE_ALOCK;
+       rw_exit(uvn->u_obj.vmobjlock);
 
        if (vp->v_type == VBLK) {
                /*
@@ -213,9 +210,11 @@ uvn_attach(struct vnode *vp, vm_prot_t a
        }
 
        if (result != 0) {
+               rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
                if (uvn->u_flags & UVM_VNODE_WANTED)
                        wakeup(uvn);
                uvn->u_flags = 0;
+               rw_exit(uvn->u_obj.vmobjlock);
                return NULL;
        }
 
@@ -236,18 +235,19 @@ uvn_attach(struct vnode *vp, vm_prot_t a
        uvn->u_nio = 0;
        uvn->u_size = used_vnode_size;
 
-       /* if write access, we need to add it to the wlist */
-       if (accessprot & PROT_WRITE) {
-               LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
-               uvn->u_flags |= UVM_VNODE_WRITEABLE;    /* we are on wlist! */
-       }
-
        /*
         * add a reference to the vnode.   this reference will stay as long
         * as there is a valid mapping of the vnode.   dropped when the
         * reference count goes to zero.
         */
        vref(vp);
+
+       /* if write access, we need to add it to the wlist */
+       if (accessprot & PROT_WRITE) {
+               uvn->u_flags |= UVM_VNODE_WRITEABLE;
+               LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+       }
+
        if (oldflags & UVM_VNODE_WANTED)
                wakeup(uvn);
 
@@ -273,6 +273,7 @@ uvn_reference(struct uvm_object *uobj)
        struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
 #endif
 
+       rw_enter(uobj->vmobjlock, RW_WRITE);
 #ifdef DEBUG
        if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
                printf("uvn_reference: ref=%d, flags=0x%x\n",
@@ -280,7 +281,6 @@ uvn_reference(struct uvm_object *uobj)
                panic("uvn_reference: invalid state");
        }
 #endif
-       rw_enter(uobj->vmobjlock, RW_WRITE);
        uobj->uo_refs++;
        rw_exit(uobj->vmobjlock);
 }
@@ -858,6 +858,8 @@ uvn_cluster(struct uvm_object *uobj, vof
        struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
        *loffset = offset;
 
+       KASSERT(rw_write_held(uobj->vmobjlock));
+
        if (*loffset >= uvn->u_size)
                panic("uvn_cluster: offset out of range");
 
@@ -867,8 +869,6 @@ uvn_cluster(struct uvm_object *uobj, vof
        *hoffset = *loffset + MAXBSIZE;
        if (*hoffset > round_page(uvn->u_size)) /* past end? */
                *hoffset = round_page(uvn->u_size);
-
-       return;
 }
 
 /*
@@ -1132,8 +1132,9 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
        off_t file_offset;
        int waitf, result, mapinflags;
        size_t got, wanted;
-       int netunlocked = 0;
+       int vnlocked, netunlocked = 0;
        int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0;
+       voff_t uvnsize;
 
        KASSERT(rw_write_held(uobj->vmobjlock));
 
@@ -1172,6 +1173,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
         * (this time with sleep ok).
         */
        uvn->u_nio++;                   /* we have an I/O in progress! */
+       vnlocked = (uvn->u_flags & UVM_VNODE_VNISLOCKED);
+       uvnsize = uvn->u_size;
        rw_exit(uobj->vmobjlock);
        if (kva == 0)
                kva = uvm_pagermapin(pps, npages,
@@ -1185,8 +1188,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
        /* fill out uio/iov */
        iov.iov_base = (caddr_t) kva;
        wanted = (size_t)npages << PAGE_SHIFT;
-       if (file_offset + wanted > uvn->u_size)
-               wanted = uvn->u_size - file_offset;     /* XXX: needed? */
+       if (file_offset + wanted > uvnsize)
+               wanted = uvnsize - file_offset; /* XXX: needed? */
        iov.iov_len = wanted;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
@@ -1217,7 +1220,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
         */
        result = 0;
        KERNEL_LOCK();
-       if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+       if (!vnlocked)
                result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags);
        if (result == 0) {
                /* NOTE: vnode now locked! */
@@ -1228,7 +1231,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
                            (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
                            curproc->p_ucred);
 
-               if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+               if (!vnlocked)
                        VOP_UNLOCK(vn);
 
        }
Index: uvm/uvm_vnode.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v
retrieving revision 1.19
diff -u -p -r1.19 uvm_vnode.h
--- uvm/uvm_vnode.h     10 Sep 2022 16:14:36 -0000      1.19
+++ uvm/uvm_vnode.h     10 Sep 2022 17:55:33 -0000
@@ -44,24 +44,27 @@
  * vnode handle into the VM system.
  */
 
+struct vnode;
+
 /*
  * the uvm_vnode structure.
+ *
+ *  Locks used to protect struct members in this file:
+ *     I       immutable after creation
+ *     K       kernel lock
+ *     S       uvn_sync_lock
+ *     v       u_obj's vmobjlock
  */
-
-struct vnode;
-
 struct uvm_vnode {
        struct uvm_object u_obj;        /* the actual VM object */
-       struct vnode *u_vnode;          /* pointer back to vnode */
-       int u_flags;                    /* flags */
-       int u_nio;                      /* number of running I/O requests */
-       voff_t u_size;                  /* size of object */
+       struct vnode *u_vnode;          /* [I] pointer back to vnode */
+       int u_flags;                    /* [v] flags */
+       int u_nio;                      /* [v] number of running I/O requests */
+       voff_t u_size;                  /* [v] size of object */
 
-       /* the following entry is locked by uvn_wl_lock */
-       LIST_ENTRY(uvm_vnode) u_wlist;  /* list of writeable vnode objects */
+       LIST_ENTRY(uvm_vnode) u_wlist;  /* [K] list of writeable vnode objs */
 
-       /* the following entry is locked by uvn_sync_lock */
-       SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
+       SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* [S] vnode objs due for a "sync" */
 };
 
 /*

Reply via email to