Re: bsd.upgrade fails `umount /mnt` with a single partition disk.

Yuichiro NAITO Wed, 10 Nov 2021 17:47:43 -0800

I tried to call 'db_enter' instead of 'panic' in 'vflush_vnode' functionas follows.


```
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -966,6 +966,7 @@ vflush_vnode(struct vnode *vp, void *arg)
                vprint("vflush: busy vnode", vp);
 #endif
        va->busy++;
+       db_enter();
        return (0);
 }
```


Ddb backtrace shows as follows.

```
topped at      db_enter+0x5:   popq    %rbp
ddb> bt

db_enter(10,ffff8000218ef6a0,202,8,ffffffff8120d219,ffff8000218ef6a0) atdb_enter+0x5vflush_vnode(fffffd807f6d5908,ffff8000218ef738,ffff8000218ef738,0,ffffffff81135adc,ffff8000218ef6d0)at vflush_vnode+0x56vfs_mount_foreach_vnode(ffff800000253400,ffffffff81135a86,ffff8000218ef738,ffff800000253400,0,ffff8000218ef738)at vfs_mount_foreach_vnode+0x33vflush(ffff800000253400,0,0,ffff800000253400,ffff8000218b5a40,1) atvflush+0x2dffs_flushfiles(ffff800000253400,0,ffff8000218b5a40,ffff800000253400,ffff800000250600,ffff800000253400)at ffs_flushfiles+0x35ffs_unmount(ffff800000253400,0,ffff8000218b5a40,ffff800000253400,0,ffff800000253400)at ffs_unmount+0x3adounmount_leaf(ffff800000253400,0,ffff8000218b5a40,ffff800000253400,0,10) atdounmount_leaf+0x97dounmount(ffff800000253400,0,ffff8000218b5a40,ffff800000253400,ffff8000218efa10,ffff8000218b5a40)at dounmount+0x8esys_unmount(ffff8000218b5a40,ffff8000218efa10,ffff8000218efa60,ffff8000218b5a40,ffff8000218efac0,ffff8000218b5a40)at sys_unmount+0xb9syscall(ffff8000218efac0,ffff8000218efac0,0,ffff8000218b5a40,0,0) atsyscall+0x1fbXsyscall(6,16,1,16,1,257d1a31a) at Xsyscall+0x128

end of kernel
```

The first argument of 'vflush_vnode' function indicates struct vnode.

```
ddb> show vnode /f 0xfffffd807f6d5908
tag UFS(1) type VREG(1) mount 0xffff800000253400 typedata 0x0
data 0xfffffd807d8b4d30 usecount 1 writecount 0 holdcnt 4 numoutput 0
clean bufs:
 bp 0xfffffd807d3a09e8
  vp 0xfffffd807f6d5908 lblkno 0x1 blkno 0x16405e0 dev 0x400
  proc 0x0 error 0 flags 6008054<ASYNC,BUSY,CALL,READ,BC,DMA>
  bufsize 0xc000 bcount 0xc000 resid 0x0

data 0xffff80000a9de000 saveaddr 0xffff800000259660 dep 0x0 iodone0xffffffff81130b93

  dirty {off 0x0 end 0x0} valid {off 0x0 end 0x0}
 bp 0xfffffd807d3a0b00
  vp 0xfffffd807f6d5908 lblkno 0x2 blkno 0x1640600 dev 0xffffffff
  proc 0x0 error 0 flags 6008014<ASYNC,BUSY,READ,BC,DMA>
  bufsize 0x4000 bcount 0x4000 resid 0x0
  data 0x0 saveaddr 0x0 dep 0x0 iodone 0x0
  dirty {off 0x0 end 0x0} valid {off 0x0 end 0x0}
 bp 0xfffffd807d3a0d30
  vp 0xfffffd807f6d5908 lblkno 0x3 blkno 0x1640620 dev 0xffffffff
  proc 0x0 error 0 flags 6008014<ASYNC,BUSY,READ,BC,DMA>
  bufsize 0x4000 bcount 0x4000 resid 0x0
  data 0x0 saveaddr 0x0 dep 0x0 iodone 0x0
  dirty {off 0x0 end 0x0} valid {off 0x0 end 0x0}
 bp 0xfffffd807d952c10
  vp 0xfffffd807f6d5908 lblkno 0x0 blkno 0x16405c0 dev 0x400
  proc 0x0 error 0 flags 6008010<BUSY,READ,BC,DMA>
  bufsize 0x4000 bcount 0x4000 resid 0x0
  data 0xffff80000a9ce000 saveaddr 0x0 dep 0x0 iodone 0x0
  dirty {off 0x0 end 0x0} valid {off 0x0 end 0x0}
dirty bufs:
```

It seems that this vnode is just reading something.
The mount point is shown below.

```
ddb> show mount /f 0xffff800000253400
flags 201001<RDONLY,LOCAL,SWAPPABLE>
vnodecovered 0xfffffd807d925040 syncer 0x0 data 0xffff800000250600
vfsconf: ops 0xffffffff81476c40 name "ffs" num 1 ref 2 flags 0x201000
statvfs cache: bsize 800 iosize 4000
blocks 8125911 free 7058539 avail 6652244
  files 2099518 ffiles 2071627 favail 2071627
  f_fsidx {0x400, 0x75ebb1ce} owner 0 ctime 0x618b5c0b
  syncwrites 0 asyncwrites = 0
  syncreads 31 asyncreads = 0
  fstype "ffs" mnton "/mnt" mntfrom "/dev/sd0a" mntspec "/dev/sd0a"
locked vnodes:
  0xfffffd807f6d5908
all vnodes:
  0xfffffd807f6d5908
```

Only one vnode is listed.
I tried to check which process is reading the file.

```
ddb> show all procs
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
*79608  169544    244      0  7         0x3                umount
 16385  334942  63557      0  3    0x100003  biowait       dd
 84098  474093  83746      0  3    0x100083  nanoslp       sleep
 83746  149350    244      0  3    0x100089  sigsusp       ksh
 63557  136898    244      0  3    0x100089  sigsusp       ksh
 40679  320580  24653      0  3    0x100083  piperd        sed
 64515  520407  24653      0  3    0x100083  piperd        tee
   244  216269  24653      0  3    0x10008b  sigsusp       ksh
 46235  193472   1543      0  3    0x100083  nanoslp       sleep
  1543  246639  24653      0  3    0x100089  sigsusp       ksh
 24653  421778  18305      0  3    0x10008b  sigsusp       ksh
 79982  360508  81417    115  3    0x100092  kqread        slaacd
 12072  170660  81417    115  3    0x100092  kqread        slaacd
 81417   41530      1      0  3    0x100080  kqread        slaacd
  6202   33156  97299     77  3    0x100092  kqread        dhcpleased
 33897  241523  97299     77  3    0x100092  kqread        dhcpleased
 97299  175348      1      0  3        0x80  kqread        dhcpleased
 43197  217016      1      0  3    0x100080  kqread        resolvd
 18305  363865      1      0  3    0x10008b  sigsusp       sh
 42161  407682      0      0  3     0x14200  bored         smr
 29177  464899      0      0  2     0x14200                zerothread
 39289  156385      0      0  3     0x14200  aiodoned      aiodoned
 21282  513038      0      0  3     0x14200  syncer        update
 13154   20732      0      0  3     0x14200  cleaner       cleaner
 59578   63904      0      0  3     0x14200  reaper        reaper
 76905  342808      0      0  3     0x14200  pgdaemon      pagedaemon
 52686   22631      0      0  3     0x14200  usbtsk        usbtask
 91247  141316      0      0  3     0x14200  usbatsk       usbatsk
  1259  298570      0      0  3  0x40014200  acpi0         acpi0
 75666  385347      0      0  3     0x14200  bored         softnet
 13987  278995      0      0  3     0x14200  bored         systqmp
 95941  144455      0      0  3     0x14200  bored         systq
 54048  119232      0      0  3  0x40014200  bored         softclock
 60901  220570      0      0  3  0x40014200                idle0
     1  467165      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper
```

It seems that dd is just reading now.
I looked for where dd is invoked in install.sub script.

```
 2641 # Feed the random pool some entropy before we read from it.
 2642 feed_random() {
 2643         (dmesg; cat $CGI_INFO /*.conf; sysctl; route -n show; df;
 2644                 ifconfig -A; hostname) >/dev/random 2>&1
 2645         if [[ -e /mnt/var/db/host.random ]]; then

2646 dd if=/mnt/var/db/host.random of=/dev/randombs=65536 count=1 \

 2647                         status=none
 2648         fi
 2649 }
```

Dd is invoked in feed_random function.
Feed_random is called in start_cgiinfo function in background.

```

2505 # Fetch the list of mirror servers and installer choices fromprevious runs if2506 # available from ftplist.cgi. Start the ftp process in thebackground, but kill

 2507 # it if it takes longer than 12 seconds.
 2508 start_cgiinfo() {

2509 # If no networks are configured, we do not need thehttplist file.

 2510         ((NIFS < 1)) && return
 2511
 2512         # Ensure proper name resolution in case there's no dns yet.
 2513         add_hostent 129.128.5.191 ftp.openbsd.org
 2514
 2515         # Make sure the ftp subshell gets its own process group.
 2516         set -m
 2517         (
 2518                 unpriv2 ftp -w 15 -Vao - \

2519"$HTTP_PROTO://ftp.openbsd.org/cgi-bin/ftplist.cgi?dbversion=1" \

 2520                         2>/dev/null >$CGI_INFO
 2521

2522 # Remember finish time for adjusting the receivedtimestamp.

 2523                 echo -n $SECONDS >$HTTP_SEC
 2524                 feed_random
 2525         ) & CGIPID=$!
 2526         set +m
 2527
 2528         # If the ftp process takes more than 12 seconds, kill it.
 2529         # XXX We are relying on the pid space not randomly biting us.
 2530         # XXX ftp could terminate early, and the pid could be reused.
 2531         (sleep 12; kill -INT -$CGIPID >/dev/null 2>&1) &
 2532 }
```

For testing, I changed to execute feed_random in foreground as follows.

```
--- a/distrib/miniroot/install.sub
+++ b/distrib/miniroot/install.sub
@@ -2514,7 +2514,6 @@ start_cgiinfo() {

        # Make sure the ftp subshell gets its own process group.
        set -m
-       (
                unpriv2 ftp -w 15 -Vao - \

"$HTTP_PROTO://ftp.openbsd.org/cgi-bin/ftplist.cgi?dbversion=1" \
                        2>/dev/null >$CGI_INFO
@@ -2522,13 +2521,11 @@ start_cgiinfo() {

# Remember finish time for adjusting the receivedtimestamp.

                echo -n $SECONDS >$HTTP_SEC
                feed_random
-       ) & CGIPID=$!
        set +m

        # If the ftp process takes more than 12 seconds, kill it.
        # XXX We are relying on the pid space not randomly biting us.
        # XXX ftp could terminate early, and the pid could be reused.
-       (sleep 12; kill -INT -$CGIPID >/dev/null 2>&1) &
 }
```

Bsd.sysupgrade always succeeds in my bhyve environment.
The reason of umount failure is concurrent execution with dd.

I don't have better idea than executing dd in foreground.
How should we fix this?


On 11/8/21 12:55, Yuichiro NAITO wrote:

Thanks for the comment.
I will see it further more.

On 11/8/21 12:45, Theo de Raadt wrote:

Doesn't this suggest a disk driver failed to perform/complete a write?
Not right at that moment but perhaps in the recent past?

Yuichiro NAITO <naito.yuich...@gmail.com> wrote:

bsd.upgrade fails and shows following messages with a singlepartition disk.


```
Force checking of clean non-root filesystems? [no] no
umount: /mnt: Device busy
Can't umount sd0a!
```

sd0 has following partitions.

```
#                size           offset  fstype [fsize bsize   cpg]
   a:         37752672               64  4.2bsd   2048 16384 12959 # /

b: 4176900 37752750 swap #none

   c:         41943040                0  unused
```

I don't want to say any thing about disk partitioning policy.
I just want to improve bsd.upgrade more stable.

This error happens in a specific environment.
In my cases, OpenBSD on FreeBSD bhyve in Intel Core i7-6700 box caused
this error. No other environments (that I have) reproduce it.

In my investigation, the following patch caused panic in the bhyve
environment.

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 43a7cbd4ae9..4f4176a77ac 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -966,6 +966,7 @@ vflush_vnode(struct vnode *vp, void *arg)
          vprint("vflush: busy vnode", vp);
  #endif
      va->busy++;
+    panic("vflush_vnode: mark busy");
      return (0);
  }

I think some vnodes are still remaining while `umount /mnt`.
So, the kernel returns EBUSY.
It's right to kernel behavior and user land should handle this case.

Doing `umount /mnt` just after `fsck -p` in disrib/miniroot/install.sub.

```

3264 # Create a skeletal /etc/fstab which is usable for theupgrade process.

3265    munge_fstab
3266
3267    # fsck -p non-root filesystems in /etc/fstab.
3268    check_fs
3269
3270    # Mount filesystems in /etc/fstab.
3272    umount /mnt || { echo "Can't umount $ROOTDEV!"; exit; }
3273    mount_fs
```

Is it necessary that /mnt is mounted while `check_fs` function?
The target disk information is already read from /mnt/etc/fstab in
`munge_fstab` function. `check_fs` uses the merged fstab in /tmp.

Just `umount /mnt` before `check_fs` can get avoid this error.
Is the following patch OK?

```
diff --git a/distrib/miniroot/install.sub b/distrib/miniroot/install.sub
old mode 100644
new mode 100755
index 16d3b6e3420..cf7e15c9754
--- a/distrib/miniroot/install.sub
+++ b/distrib/miniroot/install.sub
@@ -3243,11 +3243,12 @@ do_upgrade() {

# Create a skeletal /etc/fstab which is usable for the upgradeprocess.

      munge_fstab

+    umount /mnt || { echo "Can't umount $ROOTDEV!"; exit; }
+
      # fsck -p non-root filesystems in /etc/fstab.
      check_fs

      # Mount filesystems in /etc/fstab.
-    umount /mnt || { echo "Can't umount $ROOTDEV!"; exit; }
      mount_fs

      rm -f /mnt/bsd.upgrade /mnt/auto_upgrade.conf
```

--
Yuichiro NAITO (naito.yuich...@gmail.com)


--
Yuichiro NAITO (naito.yuich...@gmail.com)

Re: bsd.upgrade fails `umount /mnt` with a single partition disk.

Reply via email to