The branch, 1.2 has been updated
       via  d0b790de08b9c9226ac9848b434a051d99a41dd8 (commit)
      from  62179c80701cd4f773afaba54cde577de234f72d (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.2


- Log -----------------------------------------------------------------
commit d0b790de08b9c9226ac9848b434a051d99a41dd8
Author: Ronnie Sahlberg <ronniesahlb...@gmail.com>
Date:   Wed Nov 17 13:50:56 2010 +1100

    add a new support function ctdb_check_counter_equal()
    
    update nfs to try to restart the service after 10 consecutive failures
    and to flag the node unhealthy after 15
    
    add similar function to mountd

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/60.nfs |   34 ++++++++++++++++++++++++++++++----
 config/functions       |   13 +++++++++++++
 2 files changed, 43 insertions(+), 4 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index 038adbb..8889cad 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -78,15 +78,20 @@ case "$1" in
                (
                        service_name="nfs_knfsd"
                        ctdb_counter_incr
-                       ctdb_check_counter_limit 10 quiet >/dev/null
+
+                       ctdb_check_counter_equal 10 || {
+                               echo "Trying to restart NFS service"
+                               startstop_nfs restart >/dev/null 2>&1 &
+                               exit 0
+                       }
+
+                       ctdb_check_counter_limit 15 quiet >/dev/null
                ) || {
                        echo "$ctdb_check_rpc_out"
                        echo "Trying to restart NFS service"
                        startstop_nfs restart
                        exit 1
                }
-               # we haven't hit the failure limit so restart quietly
-               startstop_nfs restart >/dev/null 2>&1 &
            fi
        }
 
@@ -107,7 +112,25 @@ case "$1" in
 
        # mount needs special handling since it is sometimes not started
        # correctly on RHEL5
-       ctdb_check_rpc "MOUNTD" 100005 1 || {
+       if ctdb_check_rpc "MOUNTD" 100005 1 >/dev/null ; then
+               (service_name="nfs_mountd"; ctdb_counter_init)
+       else
+       (
+               service_name="nfs_mountd"
+               ctdb_counter_incr
+
+               ctdb_check_counter_equal 5 || {
+                       p="rpc.mountd"
+                       cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
+                       echo "Trying to restart MOUNTD [${cmd}]"
+                       killall -q -9 $p
+                       $cmd &
+                       exit 0
+               }
+
+               ctdb_check_counter_limit 10 quiet >/dev/null
+       ) || {
+               echo "$ctdb_check_rpc_out"
                p="rpc.mountd"
                cmd="${p}${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
                echo "Trying to restart MOUNTD [${cmd}]"
@@ -115,6 +138,9 @@ case "$1" in
                $cmd &
                exit 1
        }
+       fi
+
+
        # rquotad needs special handling since it is sometimes not started
        # correctly on RHEL5
        # this is not a critical service so we dont flag the node as unhealthy
diff --git a/config/functions b/config/functions
index 610085b..4acfc4f 100755
--- a/config/functions
+++ b/config/functions
@@ -571,6 +571,19 @@ ctdb_check_counter_limit () {
        echo "WARNING: less than $_limit consecutive failures ($_size) for 
$service_name, not unhealthy yet"
     fi
 }
+ctdb_check_counter_equal () {
+    _ctdb_counter_common
+
+    _limit=$1
+
+    # unary counting!
+    _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
+    if [ $_size -eq $_limit ] ; then
+       return 1
+    fi
+    return 0
+}
+
 ########################################################
 
 ctdb_spool_dir="/var/spool/ctdb"


-- 
CTDB repository

Reply via email to