On 17/12/22(Sat) 14:15, David Hill wrote:
> 
> 
> On 10/28/22 03:46, Renato Aguiar wrote:
> > Use of bbolt Go library causes 7.2 to freeze. I suspect it is triggering 
> > some
> > sort of deadlock in mmap because threads get stuck at vmmaplk.
> > 
> > I managed to reproduce it consistently in a laptop with 4 cores (i5-1135G7)
> > using one unit test from bbolt:
> > 
> >    $ doas pkg_add git go
> >    $ git clone https://github.com/etcd-io/bbolt.git
> >    $ cd bbolt
> >    $ git checkout v1.3.6
> >    $ go test -v -run TestSimulate_10000op_10p
> > 
> > The test never ends and this is the 'top' report:
> > 
> >    PID      TID PRI NICE  SIZE   RES STATE     WAIT      TIME    CPU COMMAND
> > 32181   438138 -18    0   57M   13M idle      uvn_fls   0:00  0.00% 
> > bbolt.test
> > 32181   331169  10    0   57M   13M sleep/1   nanoslp   0:00  0.00% 
> > bbolt.test
> > 32181   497390  10    0   57M   13M idle      vmmaplk   0:00  0.00% 
> > bbolt.test
> > 32181   380477  14    0   57M   13M idle      vmmaplk   0:00  0.00% 
> > bbolt.test
> > 32181   336950  14    0   57M   13M idle      vmmaplk   0:00  0.00% 
> > bbolt.test
> > 32181   491043  14    0   57M   13M idle      vmmaplk   0:00  0.00% 
> > bbolt.test
> > 32181   347071   2    0   57M   13M idle      kqread    0:00  0.00% 
> > bbolt.test
> > 
> > After this, most commands just hang. For example, running a 'ps | grep foo' 
> > in
> > another shell would do it.
> > 
> 
> I can reproduce this on MP, but not SP.  Here is /trace from ddb after using
> the ddb.trigger sysctl.  Is there any other information I could pull from
> DDB that may help?

Thanks for the useful report David! 

The issue seems to be a deadlock between the `vmmaplk' and a particular
`vmobjlock'.  uvm_map_clean() calls uvn_flush() which sleeps with the
`vmmaplk' held. 

I'll think a bit about this and try to come up with a fix ASAP.

> Stopped at    db_enter+0x10:  popq    %rbp
> ddb{3}>    PID     TID   PPID    UID  S       FLAGS  WAIT COMMAND
> *50158  300210  75987      0  7         0x3                sysctl
>  19266  326894  80979   1000  3         0x3  vmmaplk       bbolt.test
>  19266  173202  80979   1000  3   0x4000083  nanoslp       bbolt.test
>  19266   53881  80979   1000  3   0x4000083  kqread        bbolt.test
>  19266  305124  80979   1000  3   0x4000003  uvn_flsh      bbolt.test
>  19266  409572  80979   1000  3   0x4000003  vmmaplk       bbolt.test
>  19266  471071  80979   1000  3   0x4000003  vmmaplk       bbolt.test
>  19266   75742  80979   1000  3   0x4000003  vmmaplk       bbolt.test
>  80979  246480  44618   1000  3        0x83  thrsleep      go
>  80979  127832  44618   1000  3   0x4000083  thrsleep      go
>  80979  259946  44618   1000  3   0x4000083  thrsleep      go
>  80979  301163  44618   1000  3   0x4000083  thrsleep      go
>  80979  179798  44618   1000  3   0x4000083  wait          go
>  80979  488795  44618   1000  3   0x4000083  thrsleep      go
>  80979   34313  44618   1000  3   0x4000083  thrsleep      go
>  80979  265681  44618   1000  3   0x4000083  thrsleep      go
>  80979  497706  44618   1000  3   0x4000083  thrsleep      go
>  80979  427226  44618   1000  3   0x4000083  kqread        go
>  94416  390071      1      0  3    0x100083  ttyin         getty
>   8978  261384      1      0  3    0x100083  ttyin         getty
>   9412  162712      1      0  3    0x100083  ttyin         getty
>  44618  141216      1   1000  3    0x10008b  sigsusp       ksh
>  75987  285267      1      0  3    0x10008b  sigsusp       ksh
>  55798  180352      1      0  3    0x100098  kqread        cron
>  97399    2603      1      0  3        0x80  kqread        apmd
>   2179  523954      1     99  3   0x1100090  kqread        sndiod
>  26099  499871      1    110  3    0x100090  kqread        sndiod
>  12661   11825  84402     95  3   0x1100092  kqread        smtpd
>  97311   87889  84402    103  3   0x1100092  kqread        smtpd
>  18428  154020  84402     95  3   0x1100092  kqread        smtpd
> 
>  ddb{3}> trace /t 0t326894
> sleep_finish(ffff8000344b0bb0,1) at sleep_finish+0xfe
> rw_enter(fffffd821cbcb220,2) at rw_enter+0x232
> uvmfault_relock(ffff8000344b0e10) at uvmfault_relock+0x6f
> uvm_fault_lower(ffff8000344b0e10,ffff8000344b0e48,ffff8000344b0d90,0) at
> uvm_fault_lower+0x38a
> uvm_fault(fffffd821cbcb188,2e4182000,0,1) at uvm_fault+0x1b3
> upageflttrap(ffff8000344b0f70,2e4182008) at upageflttrap+0x62
> usertrap(ffff8000344b0f70) at usertrap+0x129
> recall_trap() at recall_trap+0x8
> end of kernel
> end trace frame: 0xc00005d8d8, count: -8
> 
> ddb{3}> trace /t 0t173202
> sleep_finish(ffff8000344b9380,1) at sleep_finish+0xfe
> tsleep(ffffffff823bbde8,120,ffffffff81f2bf4b,2) at tsleep+0xb2
> sys_nanosleep(ffff800034349cf0,ffff8000344b9490,ffff8000344b94f0) at
> sys_nanosleep+0x12d
> syscall(ffff8000344b9560) at syscall+0x384
> Xsyscall() at Xsyscall+0x128
> end of kernel
> end trace frame: 0x27a3e8610, count: -5
> 
> ddb{3}> trace /t 0t53881
> sleep_finish(ffff8000344bf3c0,1) at sleep_finish+0xfe
> msleep(fffffd821c7a8878,fffffd821c7a8878,318,ffffffff81fb1188,ea61) at
> msleep+0xc7
> kqueue_sleep(fffffd821c7a8878,ffff8000344bf788) at kqueue_sleep+0xbe
> kqueue_scan(ffff8000344bf688,8,ffff8000344bf580,ffff8000344bf788,ffff800034348a90,ffff8000344bf7dc)
> at kqueue_scan+0x108
> sys_kevent(ffff800034348a90,ffff8000344bf840,ffff8000344bf8a0) at
> sys_kevent+0x371
> syscall(ffff8000344bf910) at syscall+0x384
> Xsyscall() at Xsyscall+0x128
> end of kernel
> end trace frame: 0x229078460, count: -7
> 
> ddb{3}> trace /t 0t305824
> sleep_finish(ffff8000344c54a0,1) at sleep_finish+0xfe
> rwsleep(fffffd810c1ab1c0,fffffd8241b33188,204,ffffffff81fd7cd2,0) at
> rwsleep+0x9b
> uvn_flush(fffffd821276b4c8,0,400000,b) at uvn_flush+0x15a
> uvm_map_clean(fffffd821cbcb188,2e4005000,2e4405000,b) at uvm_map_clean+0x21d
> syscall(ffff8000344c57e0) at syscall+0x35f
> Xsyscall() at Xsyscall+0x128
> end of kernel
> end trace frame: 0x2783a8d40, count: -6
> 
> ddb{3}> trace /t 0t409572
> sleep_finish(ffff8000344d1fd0,1) at sleep_finish+0xfe
> rw_enter(fffffd821cbcb220,21) at rw_enter+0x232
> vm_map_lock_ln(fffffd821cbcb188,ffffffff81f22ed0,6e7) at vm_map_lock_ln+0x92
> uvmfault_lookup(ffff8000344d22b0,1) at uvmfault_lookup+0x73
> uvm_fault_check(ffff8000344d22b0,ffff8000344d22e8,ffff8000344d2310) at
> uvm_fault_check+0x27e
> uvm_fault(fffffd821cbcb188,c005ad0000,0,2) at uvm_fault+0xfb
> upageflttrap(ffff8000344d2410,c005ad0000) at upageflttrap+0x62
> usertrap(ffff8000344d2410) at usertrap+0x129
> recall_trap() at recall_trap+0x8
> end of kernel
> end trace frame: 0xc0001baab0, count: -9
> 
> ddb{3}> trace /t 0t471071
> sleep_finish(ffff8000344d8810,1) at sleep_finish+0xfe
> rw_enter(fffffd821cbcb220,2) at rw_enter+0x232
> uvm_map_inentry_fix(ffff8000344b2008,ffff8000344b2068,c0001cdd88,ffffffff81e4e040,72)
> at uvm_map_inentry_fix+0x60
> uvm_map_inentry(ffff8000344b2008,ffff8000344b2068,c0001cdd88,ffffffff81f737d9,ffffffff81e4e040,72)
> at uvm_map_inentry+0x68
> usertrap(ffff8000344d89f0) at usertrap+0x11a
> recall_trap() at recall_trap+0x8
> end of kernel
> end trace frame: 0xc0001cddc0, count: -6
> 
> ddb{3}> trace /t 0t75742
> sleep_finish(ffff8000344de580,1) at sleep_finish+0xfe
> rw_enter(fffffd821cbcb220,2) at rw_enter+0x232
> uvmfault_lookup(ffff8000344de800,0) at uvmfault_lookup+0x8a
> uvm_fault_check(ffff8000344de800,ffff8000344de838,ffff8000344de860) at
> uvm_fault_check+0x32
> uvm_fault(fffffd821cbcb188,2e432a000,0,1) at uvm_fault+0xfb
> upageflttrap(ffff8000344de960,2e432a008) at upageflttrap+0x62
> usertrap(ffff8000344de960) at usertrap+0x129
> recall_trap() at recall_trap+0x8
> end of kernel
> end trace frame: 0xc0001bea18, count: -8
> 

Reply via email to