Hi

I fixed most of the things Lars mentioned in
http://hg.linux-ha.org/dev/rev/15bcf3491f9c and will explain why I did
not fix some of them. ocf-tester runs fine with the RA.

> # FIXME: This should use pidofproc

pidofproc is not available everywhere and is not able to get down to
command line options, eg could not tell the difference between "$process
$option_a" and "$process $option_b" which I wanted to support with this
agent.

Example:
dktest3:~/src/linuxha/hg/dev # sleep 200 &
[1] 5799
dktest3:~/src/linuxha/hg/dev # sleep 300 &
[2] 5801
dktest3:~/src/linuxha/hg/dev # pidofproc sleep
5801 5799
dktest3:~/src/linuxha/hg/dev # pidofproc "sleep 300"

> # FIXME: use start_daemon

start_daemon is not available everywhere either.

> # FIXME: What about daemons which can manage their own pidfiles?

This agent is meant to be used for programs that are not actually
daemons by design. It is meant to be able to run sth "stupid" in the
cluster. Even like "/bin/sleep 10000000000000000000"

> # FIXME: use killproc

This is also a problem with "$process $option_a" and "$process
$option_b". You can't just "killproc $process" then.

> # FIXME: Attributes special meaning to the resource id

I tried to, but couldn't understand what you meant here.

I also talked to Dejan on IRC and we agreed that "anything" is a bad
name for the RA and the changeset description was propably bad, too.
This RA is not for (as the cs stated) "arbitrary daemons", it is more
for daemonizing programs which were not meant to be daemons.

If a proper name comes to anyone's mind - please share.

Hopefully, now it is a bit clearer what I wanted to be able to do with
this RA. I agree the "cmd=" lines and pid file creation are very very
ugly, but I could not yet find a better way. Not that much of a shell
genius I guess :( Please share if you can improve things.

Regards
Dominik
exporting patch:
# HG changeset patch
# User Dominik Klein <d...@in-telegence.net>
# Date 1234350091 -3600
# Node ID 04533b37813c8be009814f52de7b14ff65bf9862
# Parent  90ff997faa7288248ac57583b0c03df4c8e41bda
RA: anything. Implement most of lmbs suggestions.

diff -r 90ff997faa72 -r 04533b37813c resources/OCF/anything
--- a/resources/OCF/anything	Wed Feb 11 11:31:02 2009 +0100
+++ b/resources/OCF/anything	Wed Feb 11 12:01:31 2009 +0100
@@ -32,6 +32,7 @@
 #       OCF_RESKEY_errlogfile
 #       OCF_RESKEY_user
 #       OCF_RESKEY_monitor_hook
+#       OCF_RESKEY_stop_timeout
 #
 # This RA starts $binfile with $cmdline_options as $user and writes a $pidfile from that. 
 # If you want it to, it logs:
@@ -47,18 +48,20 @@
 # Initialization:
 . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
 
-getpid() { # make sure that the file contains a number
-	# FIXME: pidfiles could contain spaces
-	grep '^[0-9][0-9]*$' $1
+getpid() {
+        grep -o '[0-9]*' $1
 }
 
 anything_status() {
-	# FIXME: This should use pidofproc
-	# FIXME: pidfile w/o process means the process died, so should
-	# be ERR_GENERIC
-	if test -f "$pidfile" && pid=`getpid $pidfile` && kill -0 $pid
+	if test -f "$pidfile"
 	then
-		return $OCF_RUNNING
+		if pid=`getpid $pidfile` && kill -0 $pid
+		then
+			return $OCF_RUNNING
+		else
+			# pidfile w/o process means the process died
+			return $OCF_ERR_GENERIC
+		fi
 	else
 		return $OCF_NOT_RUNNING
 	fi
@@ -66,8 +69,6 @@
 
 anything_start() {
 	if ! anything_status
-	# FIXME: use start_daemon
-	# FIXME: What about daemons which can manage their own pidfiles?
 	then
 		if [ -n "$logfile" -a -n "$errlogfile" ]
 		then
@@ -101,29 +102,48 @@
 }
 
 anything_stop() {
-	# FIXME: use killproc
+        if [ -n "$OCF_RESKEY_stop_timeout" ]
+        then
+                stop_timeout=$OCF_RESKEY_stop_timeout
+                elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+                        # Allow 2/3 of the action timeout for the orderly shutdown
+                        # (The origin unit is ms, hence the conversion)
+                        stop_timeout=$((OCF_RESKEY_CRM_meta_timeout/1500))
+        else
+                stop_timeout=10
+        fi
 	if anything_status
 	then
-		pid=`getpid $pidfile`
-		kill $pid
-		i=0
-		# FIXME: escalate to kill -9 before timeout
-		while sleep 1 
-		do
-			if ! anything_status
-			then
-				rm -f $pidfile > /dev/null 2>&1
-				return $OCF_SUCCESS
-			fi
-			let "i++"
-		done
+                pid=`getpid $pidfile`
+                kill $pid
+                rm -f $pidfile
+                i=0
+                while [ $i -lt $stop_timeout ]
+                do
+                        while sleep 1 
+                        do
+                                if ! anything_status
+                                then
+                                        return $OCF_SUCCESS
+                                fi
+                                let "i++"
+                        done
+                done
+                ocf_log warn "Stop with SIGTERM failed/timed out, now sending SIGKILL."
+                kill -9 $pid
+                if ! anything_status
+                then
+                        ocf_log warn "SIGKILL did the job."
+                        return $OCF_SUCCESS
+                else
+                        ocf_log err "Failed to stop - even with SIGKILL."
+                        return $OCF_ERR_GENERIC
+                fi
 	else
 		# was not running, so stop can be considered successful
 		rm -f $pidfile 
 		return $OCF_SUCCESS
 	fi
-	# FIXME: Never reached.
-	return $OCF_ERR_GENERIC
 }
 
 anything_monitor() {
@@ -131,12 +151,12 @@
 	ret=$?
 	if [ $ret -eq $OCF_SUCCESS ]
 	then
-		# implement your deeper monitor operation here
 		if [ -n "$OCF_RESKEY_monitor_hook" ]; then
 			eval "$OCF_RESKEY_monitor_hook"
-			# FIXME: Implement a check that this doesn't
-			# accidentially return NOT_RUNNING?
-			return
+                        if [ $? -ne $OCF_SUCCESS ]; then
+                                return ${OCF_ERR_GENERIC}
+                        fi
+			return $OCF_SUCCESS
 		else
 			true
 		fi
@@ -150,19 +170,16 @@
 binfile="$OCF_RESKEY_binfile"
 cmdline_options="$OCF_RESKEY_cmdline_options"
 pidfile="$OCF_RESKEY_pidfile"
-# FIXME: Why test for $binfile here?
-[ -z "$pidfile" -a -n "$binfile" ] && pidfile=${HA_VARRUN}/anything_${process}.pid
+[ -z "$pidfile" ] && pidfile=${HA_VARRUN}/anything_${process}.pid
 logfile="$OCF_RESKEY_logfile"
 errlogfile="$OCF_RESKEY_errlogfile"
 user="$OCF_RESKEY_user"
 [ -z "$user" ] && user=root
 
 anything_validate() {
-	# FIXME: Actually this needs to test from the point of view of
-	# the user.
-	if [ ! -x "$binfile" ]
+	if ! su - $user -c "test -x $binfile"
 	then
-		ocf_log err "binfile $binfile does not exist or is not executable."
+		ocf_log err "binfile $binfile does not exist or is not executable by $user."
 		exit $OCF_ERR_INSTALLED
 	fi
 	if ! getent passwd $user >/dev/null 2>&1
@@ -243,6 +260,14 @@
 <shortdesc lang="en">Command to run in monitor operation</shortdesc>
 <content type="string"/>
 </parameter>
+<parameter name="stop_timeout">
+<longdesc lang="en">
+In the stop operation: Seconds to wait for kill -TERM to succeed
+before sending kill -SIGKILL. Defaults to 2/3 of the stop operation timeout.
+</longdesc>
+<shortdesc lang="en">Seconds to wait after having sent SIGTERM before sending SIGKILL in stop operation</shortdesc>
+<content type="string" default=""/>
+</parameter>
 </parameters>
 <actions>
 <action name="start"   timeout="90" />
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to