Volans has uploaded a new change for review.
https://gerrit.wikimedia.org/r/307482
Change subject: Reimaging: add option to reboot after the reimage
......................................................................
Reimaging: add option to reboot after the reimage
- Add a new option (-a, --reboot-after) to reboot the machine after the
reimage as a normal reboot (no PXE).
- Fixed some ShellCheck errors and uniformed variable referencing.
Bug: T143536
Change-Id: I836b6be25e92de7a6cc85a79a5e86d6eb2c7dcce
---
M modules/puppetmaster/files/wmf-reimage
1 file changed, 93 insertions(+), 63 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/82/307482/1
diff --git a/modules/puppetmaster/files/wmf-reimage
b/modules/puppetmaster/files/wmf-reimage
index 25ba300..2275efa 100755
--- a/modules/puppetmaster/files/wmf-reimage
+++ b/modules/puppetmaster/files/wmf-reimage
@@ -2,37 +2,43 @@
# Helper script for reimaging a server.
# Author: Giuseppe Lavagetto
# Copyright (c) 2014-2016 the Wikimedia Foundation
+
set -e
set -u
+
SLEEPTIME=60
FORCE=0
NOCLEAN=0
NOREBOOT=0
RUNPUPPET=1
+REBOOT_AFTER=0
+__IPMI_PASSWORD=""
+
function log {
echo "$@"
}
function clean_puppet {
- nodename=${1}
+ nodename="${1}"
log "cleaning puppet certificate for ${nodename}"
- puppet cert clean ${nodename}
+ puppet cert clean "${nodename}"
# An additional, paranoid check.
- if puppet cert list --all | fgrep -q ${nodename}; then
+ if puppet cert list --all | fgrep -q "${nodename}"; then
log "unable to clean puppet cert, please check manually"
log "Maybe you need to use the -n switch?"
exit 1
fi
log "cleaning puppet facts cache for ${nodename}"
- /usr/local/sbin/puppetstoredconfigclean.rb ${nodename}
+ /usr/local/sbin/puppetstoredconfigclean.rb "${nodename}"
}
function __salt_key {
# Executes a salt key action via salt-call
- local action=${1}
- local nodename=${2}
- local expected=${3}
- local retval=$(salt-call --log-level=warning --output=json publish.runner
"keys.${action}" ${nodename} | jq --monochrome-output ".local.status ==
\"${expected}\"")
+ local action="${1}"
+ local nodename="${2}"
+ local expected="${3}"
+ local retval
+ retval=$(salt-call --log-level=warning --output=json publish.runner
"keys.${action}" "${nodename}" | jq --monochrome-output ".local.status ==
\"${expected}\"")
if [ "${retval}" == "true" ]; then
return 0
else
@@ -41,13 +47,13 @@
}
function clean_salt {
- nodename=${1}
- force_yes=${2}
+ nodename="${1}"
+ force_yes="${2}"
log "cleaning salt key cache for ${nodename}"
# delete the key only if it has been accepted already, we are going to
# ask confirmation later about unaccepted keys
- if __salt_key status ${nodename} "accepted"; then
- if ! __salt_key delete ${nodename} "done"; then
+ if __salt_key status "${nodename}" "accepted"; then
+ if ! __salt_key delete "${nodename}" "done"; then
log "unable to clean salt key, please check manually"
log "Maybe you need to use the -n switch?"
exit 1
@@ -56,63 +62,69 @@
}
function sign_puppet {
- nodename=${1}
- force_yes=${2}
+ nodename="${1}"
+ force_yes="${2}"
log "Seeking the Puppet certificate to sign"
while true; do
- res=$(puppet cert list | sed -ne "s/\"$nodename\"//p")
- if [ "x${res}" == "x" ]; then
+ res="$(puppet cert list | sed -ne "s/\"$nodename\"//p")"
+ if [ -z "${res}" ]; then
#log "cert not found, sleeping for ${SLEEPTIME}s"
echo -n "."
- sleep $SLEEPTIME
+ sleep "${SLEEPTIME}"
continue
fi
echo "+"
- if [ ${force_yes} -eq 0 ]; then
+ if [ "${force_yes}" -eq "0" ]; then
echo "We have found a key for ${nodename} " \
"with the following fingerprint:"
echo "$res"
echo -n "Can we go on and sign it? (y/N) "
- read choice
+ read -r choice
echo
- if [ "x${choice}" != "xy" ]; then
+ if [ "${choice}" != "y" ]; then
log "Aborting on user request."
exit 1
fi
fi
- puppet cert -s ${nodename}
+ puppet cert -s "${nodename}"
break
done
}
function sign_salt {
- nodename=${1}
- force_yes=${2}
+ nodename="${1}"
+ force_yes="${2}"
+
log "Seeking the SALT node key to add"
log "This is the time to start a puppet run on the host."
+
while true; do
- if __salt_key status ${nodename} "missing"; then
+ if __salt_key status "${nodename}" "missing"; then
echo -n "."
- sleep $SLEEPTIME
+ sleep ${SLEEPTIME}
continue
- elif __salt_key status ${nodename} "pending"; then
- if __salt_key accept ${nodename} "done"; then
+ elif __salt_key status "${nodename}" "pending"; then
+ if __salt_key accept "${nodename}" "done"; then
echo "+"
break
else
echo -n '|'
- sleep $SLEEPTIME
+ sleep ${SLEEPTIME}
fi
- elif __salt_key status ${nodename} "accepted"; then
+ elif __salt_key status "${nodename}" "accepted"; then
echo "+"
break
fi
done
}
-function set_pxe_and_reboot {
- mgmtname=${1}
- if [ -z "${IPMI_PASSWORD:-}" ]; then
+function ensure_ipmi_password {
+ mgmtname="${1}"
+ # Check if the IPMI_PASSWORD was set in the environment
+ # shellcheck disable=2153
+ if [ -n "${IPMI_PASSWORD}" ]; then
+ __IPMI_PASSWORD="${IPMI_PASSWORD}"
+ else
echo "WARNING: IPMI_PASSWORD not found."
if ! tty -s; then
echo "Assuming bash, do: "
@@ -121,77 +133,95 @@
echo "WARNING: Continuing without auto rebooting the box"
return
fi
- read -s -p "IPMI password: "
- IPMI_PASSWORD=$REPLY
+ read -r -s -p "IPMI password: "
+ __IPMI_PASSWORD="${REPLY}"
fi
- IPMI_PASSWORD=$IPMI_PASSWORD ipmitool -I lanplus -H ${mgmtname} -U root -E
chassis bootdev pxe
- IPMI_PASSWORD=$IPMI_PASSWORD ipmitool -I lanplus -H ${mgmtname} -U root -E
chassis power cycle
+}
+
+function set_pxe {
+ ensure_ipmi_password "${mgmtname}"
+ IPMI_PASSWORD="${__IPMI_PASSWORD}" ipmitool -I lanplus -H "${mgmtname}" -U
root -E chassis bootdev pxe
+}
+
+function reboot_machine {
+ ensure_ipmi_password "${mgmtname}"
+ IPMI_PASSWORD="${__IPMI_PASSWORD}" ipmitool -I lanplus -H "${mgmtname}" -U
root -E chassis power cycle
}
function enable_and_run_puppet {
- nodename=${1}
+ nodename="${1}"
log "Stopping default puppet agent and enabling puppet"
- ssh -4 -i /root/.ssh/new_install -o "StrictHostKeyChecking=no" -o
"UserKnownHostsFile=/dev/null" -o "GlobalKnownHostsFile=/dev/null" ${nodename}
"service puppet stop; puppet agent --enable"
+ ssh -4 -i /root/.ssh/new_install -o "StrictHostKeyChecking=no" -o
"UserKnownHostsFile=/dev/null" -o "GlobalKnownHostsFile=/dev/null"
"${nodename}" "service puppet stop; puppet agent --enable"
log "Spawning the first puppet run as well"
- ssh -4 -q -i /root/.ssh/new_install -o "StrictHostKeyChecking=no" -o
"UserKnownHostsFile=/dev/null" -o "GlobalKnownHostsFile=/dev/null" ${nodename}
"puppet agent -t" > ${nodename}.puppetrun.log 2>&1 &
+ ssh -4 -q -i /root/.ssh/new_install -o "StrictHostKeyChecking=no" -o
"UserKnownHostsFile=/dev/null" -o "GlobalKnownHostsFile=/dev/null"
"${nodename}" "puppet agent -t" > "${nodename}.puppetrun.log" 2>&1 &
log "The first puppet run is ongoing, you can see what the result is in
the file ${PWD}/${nodename}.puppetrun.log"
}
function usage {
- echo "Usage: $0 [OPTIONS] <nodename> <mgmtname> [<newnodename>]"
+ echo "Usage: ${0} [OPTIONS] <nodename> <mgmtname> [<newnodename>]"
echo " -y, --yes don't prompt for confirmation"
echo " -r, --no-reboot don't reboot"
echo " -n, --no-clean don't clean machine puppet/salt keys"
echo " -p, --no-puppet don't run puppet after reimage"
+ echo " -a, --reboot-after reboot after the reimage"
echo " -s, --sleep=SECONDS sleep for SECONDS while waiting for keys"
- echo " (default: $SLEEPTIME)"
+ echo " (default: ${SLEEPTIME})"
exit 1
}
## Main script
-TEMP=$(getopt -o yrnps: --long yes,no-reboot,no-clean,no-puppet,sleep: \
- -n "$0" -- "$@")
-if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi
+TEMP=$(getopt -o yrnpas: --long
yes,no-reboot,no-clean,no-puppet,reboot-after,sleep: \
+ -n "${0}" -- "${@}")
+if [ "${?}" -ne "0" ]; then
+ echo "Terminating..." >&2
+ exit 1
+fi
-eval set -- "$TEMP"
+eval set -- "${TEMP}"
while true; do
- case "$1" in
+ case "${1}" in
-y|--yes) FORCE=1; shift ;;
-r|--no-reboot) NOREBOOT=1; shift ;;
-n|--no-clean) NOCLEAN=1; shift ;;
-p|--no-puppet) RUNPUPPET=0; shift ;;
- -s|--sleep) SLEEPTIME=$2; shift 2 ;;
+ -a|--reboot-after) REBOOT_AFTER=1; shift;;
+ -s|--sleep) SLEEPTIME="${2}"; shift 2 ;;
--) shift ; break ;;
*) echo "Internal error!"; exit 1 ;;
esac
done
-nodename=${1:-}
-mgmtname=${2:-}
-newnodename=${3:-}
-test -z ${nodename} && usage
-test -z ${mgmtname} && usage
-if [ -z ${newnodename} ]; then
+nodename="${1:-}"
+mgmtname="${2:-}"
+newnodename="${3:-}"
+test -z "${nodename}" && usage
+test -z "${mgmtname}" && usage
+if [ -z "${newnodename}" ]; then
log "Preparing reimaging of node ${nodename}"
else
log "Preparing reimaging of node ${nodename} to new hostname
${newnodename} (interface should already be on new network if applicable)"
fi
-if [ $NOCLEAN -eq 0 ]; then
- clean_puppet $nodename
- clean_salt $nodename $FORCE
+if [ "${NOCLEAN}" -eq "0" ]; then
+ clean_puppet "${nodename}"
+ clean_salt "${nodename}" "${FORCE}"
fi;
-test $NOREBOOT -eq 0 && set_pxe_and_reboot $mgmtname
+test "${NOREBOOT}" -eq "0" && set_pxe "${mgmtname}" && reboot_machine
"${mgmtname}"
-if [ ! -z ${newnodename} ]; then
- nodename=$newnodename
+if [ -n "${newnodename}" ]; then
+ nodename="${newnodename}"
fi
-sign_puppet $nodename $FORCE
-if [ $RUNPUPPET -eq 1 ]; then
- enable_and_run_puppet $nodename
+sign_puppet "${nodename}" "${FORCE}"
+if [ "${RUNPUPPET}" -eq "1" ]; then
+ enable_and_run_puppet "${nodename}"
fi;
-sign_salt $nodename $FORCE
+sign_salt "${nodename}" "${FORCE}"
log "Node ${nodename} is now signed and both puppet and salt should work."
+
+if [ "${REBOOT_AFTER}" -eq "1" ]; then
+ log "Rebooting the machine after everything."
+ reboot_machine "${mgmtname}"
+fi
--
To view, visit https://gerrit.wikimedia.org/r/307482
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I836b6be25e92de7a6cc85a79a5e86d6eb2c7dcce
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Volans <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits