On 29/07/22(Fri) 14:22, Theo Buehler wrote:
> On Mon, Jul 11, 2022 at 01:05:19PM +0200, Martin Pieuchot wrote:
> > On 11/07/22(Mon) 07:50, Theo Buehler wrote:
> > > On Fri, Jun 03, 2022 at 03:02:36PM +0200, Theo Buehler wrote:
> > > > > Please do note that this change can introduce/expose other issues.
> > > > 
> > > > It seems that this diff causes occasional hangs when building snapshots
> > > > on my mac M1 mini. This happened twice in 10 builds, both times in
> > > > xenocara. Unfortunately, both times the machine became entirely
> > > > unresponsive and as I don't have serial console, that's all the info I
> > > > have...
> > > > 
> > > > This machine has been very reliable and built >50 snaps without any hang
> > > > over the last 2.5 months. I'm now trying snap builds in a loop without
> > > > the diff to make sure the machine doesn't hang due to another recent
> > > > kernel change.
> > > > 
> > > 
> > > A little bit of info on this. The first three lines were a bit garbled on
> > > the screen:
> > > 
> > > panic: kernel diagnostic assertion "uvn->_oppa jai c:              ke r  
> > > el   d iag no   tic a  s   rt n "   map   ==    UL L  | | rw wr           
> > >           k
> > > ite held(amap->amap_lock)" failed: file "/ss/uvm/uvm_fault.c", line 846.
> > > ernel diagnostic assertion "!_kernel_lock_held
> > > Stopped at panic+0160:  cmp w21, #0x0  ailed: file "/sys/kern/
> > >     TID    PID     UID     PRFLAGS     PFLAGS   CPU  COMMAND
> > >  411910  44540      21    0x100001          0     3  make
> > > *436444  84241      21    0x100003          0     6  sh
> > >  227952  53498      21    0x100003          0     5  sh
> > >  258925  15765      21    0x101005          0     0  make
> > >  128459   9649      21    0x100003          0     1  tradcpp
> > >  287213  64216      21    0x100003        0x8     7  make
> > >  173587   4617    1000    0x100000          0     2  tmux
> > >  126511  69919       0     0x14000      0x200     4  softnet
> > > db_enter() at panic+0x15c
> > > panic() at __assert+0x24
> > > uvm_fault() at uvm_fault_upper_lookup+0x258
> > > uvm_fault_upper() at uvm_fault+0xec
> > > uvm_fault() at udata_abort+0x128
> > > udata_abort() at do_el0_sync+0xdc
> > > do_el0_sync() at handle_el0_sync+0x74
> > > https://www.openbsd.org/ddb.html describes the minimum info required in 
> > > bug
> > > reports.  Insufficient info makes it difficult to find and fix bugs.
> > > ddb{6}> show panic
> > > *cpu0: kernel diagnostic assertion "uvn->u_obj.uo_refs == 0" failed: file 
> > >  "/sys/kern/uvn_vnode.c", line 231.
> > >  cpu6: kernel diagnostic assertion "amap == NULL || 
> > > rw_write_held(amap->am_lock)" failed: file  "/sys/uvm/uvm_fault", line 
> > > 846.
> > >  cpu3: kernel diagnostic assertion "!_kernel_lock_held()" failed: file 
> > > "/sys/kern/kern_fork.c", line 678
> > > ddb{6}> mach ddbcpu 0
> > > 
> > > After pressing enter here, the machine locked up completely.
> > 
> > It's hard for me to tell what's going on.  I believe the interesting
> > trace is the one from cpu0 that we don't have.  Can you easily reproduce
> > this?  I'm trying on amd64 without luck.  I'd glad if you could gather
> > more infos.
> 
> Sorry for the delay. I was only at home intermittently. I hit this three
> times:
> 
> panic: kernel diagnostic assertion "uvn->u_obj.uo_refs == 0" failed: file 
> "/sys/uvm/uvm_vnode.c", line 231

Yesterday gnezdo@ fixed a race in uvn_attach() that lead to the same
assert.  Here's an rebased diff for the bug discussed in this thread,
could you try again and let us know?  Thanks!

Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.127
diff -u -p -r1.127 uvm_vnode.c
--- uvm/uvm_vnode.c     31 Aug 2022 09:07:35 -0000      1.127
+++ uvm/uvm_vnode.c     1 Sep 2022 12:54:27 -0000
@@ -163,11 +163,8 @@ uvn_attach(struct vnode *vp, vm_prot_t a
         */
        rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
        if (uvn->u_flags & UVM_VNODE_VALID) {   /* already active? */
+               KASSERT(uvn->u_obj.uo_refs > 0);
 
-               /* regain vref if we were persisting */
-               if (uvn->u_obj.uo_refs == 0) {
-                       vref(vp);
-               }
                uvn->u_obj.uo_refs++;           /* bump uvn ref! */
                rw_exit(uvn->u_obj.vmobjlock);
 
@@ -235,7 +232,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a
        KASSERT(uvn->u_obj.uo_refs == 0);
        uvn->u_obj.uo_refs++;
        oldflags = uvn->u_flags;
-       uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
+       uvn->u_flags = UVM_VNODE_VALID;
        uvn->u_nio = 0;
        uvn->u_size = used_vnode_size;
 
@@ -248,7 +245,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a
        /*
         * add a reference to the vnode.   this reference will stay as long
         * as there is a valid mapping of the vnode.   dropped when the
-        * reference count goes to zero [and we either free or persist].
+        * reference count goes to zero.
         */
        vref(vp);
        if (oldflags & UVM_VNODE_WANTED)
@@ -321,16 +318,6 @@ uvn_detach(struct uvm_object *uobj)
         */
        vp->v_flag &= ~VTEXT;
 
-       /*
-        * we just dropped the last reference to the uvn.   see if we can
-        * let it "stick around".
-        */
-       if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
-               /* won't block */
-               uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
-               goto out;
-       }
-
        /* its a goner! */
        uvn->u_flags |= UVM_VNODE_DYING;
 
@@ -380,7 +367,6 @@ uvn_detach(struct uvm_object *uobj)
        /* wake up any sleepers */
        if (oldflags & UVM_VNODE_WANTED)
                wakeup(uvn);
-out:
        rw_exit(uobj->vmobjlock);
 
        /* drop our reference to the vnode. */
@@ -496,8 +482,8 @@ uvm_vnp_terminate(struct vnode *vp)
        }
 
        /*
-        * done.   now we free the uvn if its reference count is zero
-        * (true if we are zapping a persisting uvn).   however, if we are
+        * done.   now we free the uvn if its reference count is zero.
+        * however, if we are
         * terminating a uvn with active mappings we let it live ... future
         * calls down to the vnode layer will fail.
         */
@@ -505,14 +491,14 @@ uvm_vnp_terminate(struct vnode *vp)
        if (uvn->u_obj.uo_refs) {
                /*
                 * uvn must live on it is dead-vnode state until all references
-                * are gone.   restore flags.    clear CANPERSIST state.
+                * are gone.   restore flags.
                 */
                uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
-                     UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
+                     UVM_VNODE_WANTED);
        } else {
                /*
                 * free the uvn now.   note that the vref reference is already
-                * gone [it is dropped when we enter the persist state].
+                * gone.
                 */
                if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
                        panic("uvm_vnp_terminate: io sync wanted bit set");
@@ -1349,46 +1335,14 @@ uvm_vnp_uncache(struct vnode *vp)
        }
 
        /*
-        * we have a valid, non-blocked uvn.   clear persist flag.
+        * we have a valid, non-blocked uvn.
         * if uvn is currently active we can return now.
         */
-       uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
        if (uvn->u_obj.uo_refs) {
                rw_exit(uobj->vmobjlock);
                return FALSE;
        }
 
-       /*
-        * uvn is currently persisting!   we have to gain a reference to
-        * it so that we can call uvn_detach to kill the uvn.
-        */
-       vref(vp);                       /* seems ok, even with VOP_LOCK */
-       uvn->u_obj.uo_refs++;           /* value is now 1 */
-       rw_exit(uobj->vmobjlock);
-
-#ifdef VFSLCKDEBUG
-       /*
-        * carry over sanity check from old vnode pager: the vnode should
-        * be VOP_LOCK'd, and we confirm it here.
-        */
-       if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
-               panic("uvm_vnp_uncache: vnode not locked!");
-#endif
-
-       /*
-        * now drop our reference to the vnode.   if we have the sole
-        * reference to the vnode then this will cause it to die [as we
-        * just cleared the persist flag].   we have to unlock the vnode
-        * while we are doing this as it may trigger I/O.
-        *
-        * XXX: it might be possible for uvn to get reclaimed while we are
-        * unlocked causing us to return TRUE when we should not.   we ignore
-        * this as a false-positive return value doesn't hurt us.
-        */
-       VOP_UNLOCK(vp);
-       uvn_detach(&uvn->u_obj);
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
        return TRUE;
 }
 
@@ -1483,11 +1437,9 @@ uvm_vnp_sync(struct mount *mp)
                        continue;
 
                /*
-                * gain reference.   watch out for persisting uvns (need to
-                * regain vnode REF).
+                * gain reference.
                 */
-               if (uvn->u_obj.uo_refs == 0)
-                       vref(vp);
+               KASSERT(uvn->u_obj.uo_refs > 0);
                uvn->u_obj.uo_refs++;
 
                SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);

Reply via email to