The branch, master has been updated via 1569a54bb82fc433895ed68f816cf48399ad9d40 (commit) from 1d77a3adfff893b3c01b87f791e72c0d3148425c (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 1569a54bb82fc433895ed68f816cf48399ad9d40 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Wed Nov 17 13:50:56 2010 +1100 add a new support function ctdb_check_counter_equal() update nfs to try to restart the service after 10 consecutive failures and to flag the node unhealthy after 15 add similar function to mountd ----------------------------------------------------------------------- Summary of changes: config/events.d/60.nfs | 34 ++++++++++++++++++++++++++++++---- config/functions | 13 +++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) Changeset truncated at 500 lines: diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs index 038adbb..8889cad 100755 --- a/config/events.d/60.nfs +++ b/config/events.d/60.nfs @@ -78,15 +78,20 @@ case "$1" in ( service_name="nfs_knfsd" ctdb_counter_incr - ctdb_check_counter_limit 10 quiet >/dev/null + + ctdb_check_counter_equal 10 || { + echo "Trying to restart NFS service" + startstop_nfs restart >/dev/null 2>&1 & + exit 0 + } + + ctdb_check_counter_limit 15 quiet >/dev/null ) || { echo "$ctdb_check_rpc_out" echo "Trying to restart NFS service" startstop_nfs restart exit 1 } - # we haven't hit the failure limit so restart quietly - startstop_nfs restart >/dev/null 2>&1 & fi } @@ -107,7 +112,25 @@ case "$1" in # mount needs special handling since it is sometimes not started # correctly on RHEL5 - ctdb_check_rpc "MOUNTD" 100005 1 || { + if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then + (service_name="nfs_mountd"; ctdb_counter_init) + else + ( + service_name="nfs_mountd" + ctdb_counter_incr + + ctdb_check_counter_equal 5 || { + p="rpc.mountd" + cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}" + echo "Trying to restart MOUNTD [${cmd}]" + killall -q -9 $p + $cmd & + exit 0 + } + + ctdb_check_counter_limit 10 quiet >/dev/null + ) || { + echo "$ctdb_check_rpc_out" p="rpc.mountd" cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}" echo "Trying to restart MOUNTD [${cmd}]" @@ -115,6 +138,9 @@ case "$1" in $cmd & exit 1 } + fi + + # rquotad needs special handling since it is sometimes not started # correctly on RHEL5 # this is not a critical service so we dont flag the node as unhealthy diff --git a/config/functions b/config/functions index 610085b..4acfc4f 100755 --- a/config/functions +++ b/config/functions @@ -571,6 +571,19 @@ ctdb_check_counter_limit () { echo "WARNING: less than $_limit consecutive failures ($_size) for $service_name, not unhealthy yet" fi } +ctdb_check_counter_equal () { + _ctdb_counter_common + + _limit=$1 + + # unary counting! + _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0) + if [ $_size -eq $_limit ] ; then + return 1 + fi + return 0 +} + ######################################################## ctdb_spool_dir="/var/spool/ctdb" -- CTDB repository