On Mon, Jan 15, 2018 at 07:25:11PM +0000, David Bright wrote: > Author: dab > Date: Mon Jan 15 19:25:11 2018 > New Revision: 328013 > URL: https://svnweb.freebsd.org/changeset/base/328013 > > Log: > Exit fsck_ffs with non-zero status when file system is not repaired. > > When the fsck_ffs program cannot fully repair a file system, it will > output the message PLEASE RERUN FSCK. However, it does not exit with a > non-zero status in this case (contradicting the man page claim that it > "exits with 0 on success, and >0 if an error occurs." The fsck > rc-script (when running "fsck -y") tests the status from fsck (which > passes along the exit status from fsck_ffs) and issues a "stop_boot" > if the status fails. However, this is not effective since fsck_ffs can > return zero even on (some) errors. Effectively, it is left to a later > step in the boot process when the file systems are mounted to detect > the still-unclean file system and stop the boot. > > This change modifies fsck_ffs so that when it cannot fully repair the > file system and issues the PLEASE RERUN FSCK message it also exits > with a non-zero status. > > While here, the fsck_ffs man page has also been updated to document > the failing exit status codes used by fsck_ffs. Previously, only exit > status 7 was documented. Some of these exit statuses are tested for in > the fsck rc-script, so they are clearly depended upon and deserve > documentation.
etc/rc.d/fsck doesn't know how to interpret the new exit code and now just drops to a single-user shell when it is encountered. This is happening to me semi-regularly when my test systems crash, especially when I test kernel panic handling. :) Is there any reason etc/rc.d/fsck shouldn't automatically retry (up to some configurable number of retries) when the new error code is seen? The patch below seems to do the trick for me: diff --git a/etc/defaults/rc.conf b/etc/defaults/rc.conf index 584e842bba2c..63d2fcc0be8d 100644 --- a/etc/defaults/rc.conf +++ b/etc/defaults/rc.conf @@ -95,6 +95,7 @@ root_rw_mount="YES" # Set to NO to inhibit remounting root read-write. root_hold_delay="30" # Time to wait for root mount hold release. fsck_y_enable="NO" # Set to YES to do fsck -y if the initial preen fails. fsck_y_flags="-T ffs:-R -T ufs:-R" # Additional flags for fsck -y +fsck_retries="3" # Number of times to retry fsck before giving up. background_fsck="YES" # Attempt to run fsck in the background where possible. background_fsck_delay="60" # Time to wait (seconds) before starting the fsck. growfs_enable="NO" # Set to YES to attempt to grow the root filesystem on boot diff --git a/etc/rc.d/fsck b/etc/rc.d/fsck index bd3122a20110..708d92228e3d 100755 --- a/etc/rc.d/fsck +++ b/etc/rc.d/fsck @@ -14,8 +14,82 @@ desc="Run file system checks" start_cmd="fsck_start" stop_cmd=":" +_fsck_run() +{ + local err + + if checkyesno background_fsck; then + fsck -F -p + else + fsck -p + fi + + err=$? + if [ ${err} -eq 3 ]; then + echo "Warning! Some of the devices might not be" \ + "available; retrying" + root_hold_wait + check_startmsgs && echo "Restarting file system checks:" + if checkyesno background_fsck; then + fsck -F -p + else + fsck -p + fi + err=$? + fi + + case ${err} in + 0) + ;; + 2) + stop_boot + ;; + 4) + echo "Rebooting..." + reboot + echo "Reboot failed; help!" + stop_boot + ;; + 8) + if checkyesno fsck_y_enable; then + echo "File system preen failed, trying fsck -y ${fsck_y_flags}" + fsck -y ${fsck_y_flags} + case $? in + 0) + ;; + *) + echo "Automatic file system check failed; help!" + stop_boot + ;; + esac + else + echo "Automatic file system check failed; help!" + stop_boot + fi + ;; + 12) + echo "Boot interrupted." + stop_boot + ;; + 16) + echo "File system check retry requested." + ;; + 130) + stop_boot + ;; + *) + echo "Unknown error ${err}; help!" + stop_boot + ;; + esac + + return $err +} + fsck_start() { + local err tries + if [ "$autoboot" = no ]; then echo "Fast boot: skipping disk checks." elif [ ! -r /etc/fstab ]; then @@ -25,67 +99,13 @@ fsck_start() trap : 3 check_startmsgs && echo "Starting file system checks:" - if checkyesno background_fsck; then - fsck -F -p - else - fsck -p - fi - - err=$? - if [ ${err} -eq 3 ]; then - echo "Warning! Some of the devices might not be" \ - "available; retrying" - root_hold_wait - check_startmsgs && echo "Restarting file system checks:" - if checkyesno background_fsck; then - fsck -F -p - else - fsck -p - fi + tries=$fsck_retries + while [ $tries -gt 0 ]; do + _fsck_run err=$? - fi - - case ${err} in - 0) - ;; - 2) - stop_boot - ;; - 4) - echo "Rebooting..." - reboot - echo "Reboot failed; help!" - stop_boot - ;; - 8) - if checkyesno fsck_y_enable; then - echo "File system preen failed, trying fsck -y ${fsck_y_flags}" - fsck -y ${fsck_y_flags} - case $? in - 0) - ;; - *) - echo "Automatic file system check failed; help!" - stop_boot - ;; - esac - else - echo "Automatic file system check failed; help!" - stop_boot - fi - ;; - 12) - echo "Boot interrupted." - stop_boot - ;; - 130) - stop_boot - ;; - *) - echo "Unknown error ${err}; help!" - stop_boot - ;; - esac + [ $err -eq 16 ] || break + tries=$(($tries - 1)) + done fi } diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5 index c27a2134e6bc..c9a16ca9f65c 100644 --- a/share/man/man5/rc.conf.5 +++ b/share/man/man5/rc.conf.5 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 15, 2018 +.Dd March 9, 2018 .Dt RC.CONF 5 .Os .Sh NAME @@ -2053,6 +2053,11 @@ will be run with the .Fl y flag if the initial preen of the file systems fails. +.It Va fsck_retries +.Pq Vt int +Maximum number of times to re-run +.Xr fsck 8 +if its exit status indicates that a re-run is required. .It Va background_fsck .Pq Vt bool If set to _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"