commit:     90138e02119bae09efe78f6ece02c039ad132137
Author:     Thomas Deutschmann <whissi <AT> gentoo <DOT> org>
AuthorDate: Sun Mar 14 18:24:39 2021 +0000
Commit:     Thomas Deutschmann <whissi <AT> gentoo <DOT> org>
CommitDate: Sun Mar 14 19:39:48 2021 +0000
URL:        https://gitweb.gentoo.org/proj/genkernel.git/commit/?id=90138e02

linuxrc: Refactor switch_root error handling

Previous idea (commit 93bf318e5114233f3cacc4575ab2e58d60e785c7) never
worked: Exec will replace initramfs' init (PID 1) with specified command
so that any further line will never be reached. If that command will
fail now, init basically ended which will trigger a kernel panic:

  !! A fatal error has occured since /sbin/openrc-init did not
  !! boot correctly. Trying to open a shell ...
  + exec /bin/bash
  /init: exec: line 1366: /bin/bash: not found
  [   55.060649] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x00007f00

The new error handling will keep init running in loop which will
allow user to fix every detected problem until we are confident that
switch_root call has a chance to succeed.

In case the user cannot fix the problem (maybe because of
gk.userinteraction.disabled), we will call the newly added
gk.emergency action (reboot, poweroff or halt).

Signed-off-by: Thomas Deutschmann <whissi <AT> gentoo.org>

 defaults/initrd.defaults |  1 +
 defaults/linuxrc         | 73 +++++++++++++++++++++++++++++++++---------------
 doc/genkernel.8.txt      |  8 +++++-
 3 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/defaults/initrd.defaults b/defaults/initrd.defaults
index b4e1e1d..18e1382 100644
--- a/defaults/initrd.defaults
+++ b/defaults/initrd.defaults
@@ -71,6 +71,7 @@ VERIFY=0
 IP='dhcp'
 GK_BOOTFONT_DISABLED=0
 GK_DEBUGMODE_STATEFILE="/tmp/debug.enabled"
+GK_EMERGENCY_ACTION="halt -f"
 GK_HW_LOAD_ALL_MODULES=0
 GK_HW_USE_MODULES_LOAD=0
 GK_INIT_LOG='/run/initramfs/init.log'

diff --git a/defaults/linuxrc b/defaults/linuxrc
index 8356aea..ebed7d8 100644
--- a/defaults/linuxrc
+++ b/defaults/linuxrc
@@ -289,6 +289,24 @@ do
                        fi
                        unset tmp_disabled
                ;;
+               gk.emergency=*)
+                       tmp_action=${x#*=}
+                       case "${tmp_action}" in
+                               reboot)
+                                       GK_EMERGENCY_ACTION="reboot -f"
+                                       ;;
+                               poweroff)
+                                       GK_EMERGENCY_ACTION="poweroff -f"
+                                       ;;
+                               halt)
+                                       GK_EMERGENCY_ACTION="halt -f"
+                                       ;;
+                               *)
+                                       warn_msg "'${x}' is an unsupported 
emergency action -- ignored!"
+                                       ;;
+                       esac
+                       unset tmp_action
+               ;;
                gk.hw.load-all=*)
                        tmp_disabled=${x#*=}
                        if is_true "${tmp_disabled}"
@@ -1341,28 +1359,39 @@ fi
 
 # init_opts is set in the environment by the kernel when it parses the command 
line
 init=${REAL_INIT:-/sbin/init}
-if ! mountpoint "${CHROOT}" 1>/dev/null 2>&1
-then
-       bad_msg "${CHROOT} was not a mountpoint"
-elif chroot "${CHROOT}" test ! -x /${init#/}
-then
-       bad_msg "init=${init} does not exist in the rootfs!"
-elif [ $$ != 1 ]
-then
-       bad_msg "PID was not 1! switch_root would fail"
-else
-       good_msg "Switching to real root: switch_root ${CHROOT} ${init} 
${init_opts}"
-       exec switch_root "${CHROOT}" "${init}" ${init_opts}
-fi
 
-# If we get here, something bad has happened
-splash 'verbose'
+while true
+do
+       # switch_root can only be called from PID 1;
+       # So stay in loop as long as user is able
+       # to fix the problem.
 
-bad_msg "A fatal error has occured since ${init} did not"
-bad_msg "boot correctly. Trying to open a shell ..."
+       if  ! mountpoint "${CHROOT}" 1>/dev/null 2>&1
+       then
+               bad_msg "${CHROOT} is not a mountpoint; Was root device 
(${REAL_ROOT}) not mounted?"
+       elif ! chroot "${CHROOT}" test -x /${init#/} 1>/dev/null 2>&1
+       then
+               mounted_root_device=$(mountpoint -n /newroot 2>/dev/null | awk 
'{ print $1 }')
+               bad_msg "init (${init}) not found in mounted root device 
(${mounted_root_device})!"
+       else
+               break
+       fi
+
+       run_emergency_shell
+       if ! is_userinteraction_allowed
+       then
+               ${GK_EMERGENCY_ACTION} || exit 1
+       fi
+done
 
-exec /bin/bash
-exec /bin/sh
-exec /bin/ash
-exec /bin/dash
-exec sh
+good_msg "Switching to real root: switch_root ${CHROOT} ${init} ${init_opts}"
+exec switch_root "${CHROOT}" "${init}" ${init_opts}
+
+# If we reached here, something went very badly wrong in the initramfs.
+# However, spawning a rescue shell at this point would not help anymore:
+#   1) We have to assume that switch_root has already messed with
+#      initramfs (i.e. we probably have no /dev anymore).
+#   2) Any shell we would spawn would become child of PID 1 which would
+#      prevent user from calling switch_root once the user believes the
+#      problem was fixed.
+${GK_EMERGENCY_ACTION} || exit 1

diff --git a/doc/genkernel.8.txt b/doc/genkernel.8.txt
index b75f979..44f3d59 100644
--- a/doc/genkernel.8.txt
+++ b/doc/genkernel.8.txt
@@ -832,6 +832,12 @@ when not set. This will allow remote user to provide 
answer through
     will load that font. This boolean option allows you to disable
     loading of the user embedded bootfont.
 
+*gk.emergency*=<reboot|poweroff|halt>::
+    By default, genkernel tries to spawn a rescue shell in case of
+    a critical failure (see *gk.userinteraction.disabled* to prevent this).
+    This option controls the action to execute when rescue is not possible.
+    Default is "halt".
+
 *gk.hw.load-all*=<...>::
     By default, genkernel loads various module groups (nvme, sata,
     scsi, pata, usb...) until block device specified in *root* parameter
@@ -856,7 +862,7 @@ or desired, e.g. for a kiosk system, this boolean option 
will disable
 any prompting, including the rescue shell.
 
 NOTE: Because no user interaction is possible when this option is set,
-system will automatically reboot on error after a timeout.
+on error, system will immediately execute *gk.emergency* action.
 
 *noload*=<...>::
 List of modules to skip loading.

Reply via email to