Hi, Dejan
I fixed spelling mistakes in the oracle/oralsnr patch
and restore the status operation(please see attached).
And I'd like to make sure that which log in the
monitor operations is unnecessary?
Best regards,
NAKAHIRA Kazutomo
(2010/02/11 19:51), Dejan Muhamedagic wrote:
Hi Kazutomo-san,
On Wed, Feb 10, 2010 at 11:11:42AM +0900, nakah...@intellilink.co.jp wrote:
Hi, Dejan
Thank you for your comment for my patch.
And sorry for not getting back to you any earlier.
I agree to all your points.
Could you commit patches to the agents repository?
It turns out that the patches need quite a bit of work and I'm
afraid I can't afford to spend the time right now. Can you please
do that yourself: fix spelling mistakes, don't log unnecessarily
in the monitor operations, and the status operation should output
to stdout and not log.
Thanks,
Dejan
Best Regards,
NAKAHIRA Kazutomo
Quoting Dejan Muhamedagic<deja...@fastmail.fm>:
Hi Kazutomo-san,
On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote:
Hi, all
I improved logging of the oracle/oralsnr RA.
This patch has aimed to record the execution result of the sqlplus
(and other commands) and output a detailed failure log using ocf_log
when some problems occurred in RA operation.
Thanks for the patch. There are a few spelling problems, but I'll
fix those. Also, the status operation must output to stdout,
that's not meant for the logs.
Cheers,
Dejan
Best Regards,
NAKAHIRA Kazutomo
--
----------------------------------------
NAKAHIRA Kazutomo
NTT DATA INTELLILINK CORPORATION
Open Source Business Unit
Software Services Integration Business Division
# HG changeset patch
# User r...@prec370b
# Date 1264145021 -32400
# Node ID 0ecef9560522601936888ce168dae5f563662402
# Parent 3024963150433960c51aa1bdccde39839efb09b7
oracle: improve logging
diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle
--- a/heartbeat/oracle Thu Jan 21 16:42:40 2010 +0100
+++ b/heartbeat/oracle Fri Jan 22 16:23:41 2010 +0900
@@ -231,11 +231,23 @@ ora_info() {
testoraenv() {
# Let's make sure a few important things are set...
- [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+ if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+ ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL.
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
return 1
+ fi
# and some important things are there
- [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+ if [ ! -x "$sqlplus" ]; then
+ ocf_log err "Executeble sqlplus command($sqlplus) dose not
exist."
return 1
+ fi
+ if [ ! -x "$lsnrctl" ]; then
+ ocf_log err "Executeble lsnrctl command($lsnrctl) dose not
exist."
+ return 1
+ fi
+ if [ ! -x "$tnsping" ]; then
+ ocf_log err "Executeble tnsping command($tnsping) dose not
exist."
+ return 1
+ fi
return 0
}
@@ -367,13 +379,20 @@ showdbstat() {
# Part 1: Oracle
dumpinstipc() {
local dumpdest=`dbasql getdumpdest`
- [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
+ if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
+ ocf_log warn "dumpdest($dumpdest) is not a regular directory."
+ return 1
+ fi
local -i fcount=`ls -rt $dumpdest | wc -l`
- dbasql getipc>/dev/null 2>&1
+ output=`dbasql getipc`
local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
local -i fcount2=`ls -rt $dumpdest | wc -l`
- [ $((fcount+1)) -eq $fcount2 ] || return 1 # more than one file created
- echo $dumpdest/$lastf
+ if [ $((fcount+1)) -eq $fcount2 ]; then
+ echo $dumpdest/$lastf
+ else
+ ocf_log warn "dumpinstipc failed bacause the number of output
files is wrong. before dump file count=$fcount, after dump file
count=$fcount2, getipc result=$output"
+ return 1
+ fi
}
parseipc() {
local inf=$1
@@ -440,7 +459,13 @@ is_oracle_up() {
}
# instance in OPEN state?
instance_live() {
- [ "`dbasql dbstat`" = OPEN ]
+ output=`dbasql dbstat`
+ if [ "$output" = OPEN ]; then
+ return 0
+ else
+ ocf_log info "Instance state is not OPEN. dbstat result=$output"
+ return 1
+ fi
}
ora_cleanup() {
@@ -498,6 +523,7 @@ oracle_start() {
# try to cleanup in case of
# ORA-01081: cannot start already-running ORACLE - shut it down
first
if echo "$output" | grep ORA-01081>/dev/null 2>&1; then
+ ocf_log info "ORA-01081 error was found. try to cleanup
oracle.
DB start output=$output"
ora_cleanup
output=`dbasql dbstart_mount`
fi
@@ -510,7 +536,7 @@ oracle_start() {
;;
*)
: error!!
- ocf_log error "Oracle $ORACLE_SID can not mount."
+ ocf_log error "Oracle $ORACLE_SID can not mount. DB
status=$status, DB start output=$output"
return $OCF_ERR_GENERIC
;;
esac
@@ -523,13 +549,16 @@ oracle_start() {
fi
output=`dbasql dbopen`
- if is_oracle_up&& instance_live; then
+ if ! is_oracle_up; then
+ ocf_log err "Oracle process is not started: $output"
+ return $OCF_ERR_GENERIC
+ elif ! instance_live; then
+ ocf_log err "Oracle instance $ORACLE_SID not started: $output"
+ return $OCF_ERR_GENERIC
+ else
: cool, we are up and running
ocf_log info "Oracle instance $ORACLE_SID started: $output"
return $OCF_SUCCESS
- else
- ocf_log err "Oracle instance $ORACLE_SID not started: $output"
- return $OCF_ERR_GENERIC
fi
}
@@ -562,13 +591,21 @@ killprocs() {
killprocs() {
local sig=$1
shift 1
- kill -$sig $*>/dev/null 2>&1
+ # Record stderr
+ kill -$sig $*>/dev/null
}
ora_kill() {
- killprocs TERM `eval $procs | awk '{print $1}'`
+ oraprocs=`eval $procs | awk '{print $1}'`
+ if [ -z "$oraprocs" ]; then
+ ocf_log debug "All oracle processes are already stopped."
+ return
+ fi
+ killprocs TERM $oraprocs
for i in 1 2 3 4 5; do
- killprocs 0 `eval $procs | awk '{print $1}'` ||
+ if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+ ocf_log debug "All oracle processes are killed."
return
+ fi
sleep 5
done
killprocs KILL `eval $procs | awk '{print $1}'`
@@ -578,14 +615,16 @@ ora_kill() {
# oracle_monitor: Can the Oracle instance do anything useful?
#
oracle_monitor() {
- if is_oracle_up&& instance_live
- then
- #ocf_log info "Oracle instance $ORACLE_SID is alive"
- return $OCF_SUCCESS
- else
+ if ! is_oracle_up; then
+ ocf_log info "Oracle process is down"
+ return $OCF_NOT_RUNNING
+ fi
+ if ! instance_live; then
ocf_log info "Oracle instance $ORACLE_SID is down"
return $OCF_NOT_RUNNING
fi
+ #ocf_log info "Oracle instance $ORACLE_SID is alive"
+ return $OCF_SUCCESS
}
#
@@ -675,10 +714,10 @@ case "$1" in
status) if is_oracle_up
then
- echo Oracle instance $ORACLE_SID is running
+ ocf_log info "Oracle instance $ORACLE_SID is running"
exit $OCF_SUCCESS
else
- echo Oracle instance $ORACLE_SID is stopped
+ ocf_log info "Oracle instance $ORACLE_SID is stopped"
exit $OCF_NOT_RUNNING
fi
;;
# HG changeset patch
# User r...@prec370b
# Date 1264145083 -32400
# Node ID b36abb1554500f4bdf33858989205d02606c609d
# Parent 0ecef9560522601936888ce168dae5f563662402
oralsnr: improve logging
diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr
--- a/heartbeat/oralsnr Fri Jan 22 16:23:41 2010 +0900
+++ b/heartbeat/oralsnr Fri Jan 22 16:24:43 2010 +0900
@@ -158,11 +158,23 @@ ora_info() {
testoraenv() {
# Let's make sure a few important things are set...
- [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+ if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+ ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL.
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
return 1
+ fi
# and some important things are there
- [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+ if [ ! -x "$sqlplus" ]; then
+ ocf_log err "Executeble sqlplus command($sqlplus) dose not
exist."
return 1
+ fi
+ if [ ! -x "$lsnrctl" ]; then
+ ocf_log err "Executeble lsnrctl command($lsnrctl) dose not
exist."
+ return 1
+ fi
+ if [ ! -x "$tnsping" ]; then
+ ocf_log err "Executeble tnsping command($tnsping) dose not
exist."
+ return 1
+ fi
return 0
}
@@ -247,10 +259,17 @@ oralsnr_stop() {
# kill the listener procs
# give them 10 secs to exit cleanly (5 times 2)
oralsnr_kill() {
- killprocs TERM `eval $procs | awk '{print $1}'`
+ oraprocs=`eval $procs | awk '{print $1}'`
+ if [ -z "$oraprocs" ]; then
+ ocf_log debug "All oralsnr processes are already stopped."
+ return
+ fi
+ killprocs TERM $oraprocs
for i in 1 2 3 4 5; do
- killprocs 0 `eval $procs | awk '{print $1}'` ||
+ if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+ ocf_log debug "All oralsnr processes are killed."
return
+ fi
sleep 2
done
killprocs KILL `eval $procs | awk '{print $1}'`
@@ -258,7 +277,8 @@ killprocs() {
killprocs() {
sig=$1
shift 1
- kill -$sig $*>/dev/null 2>&1
+ # Record stderr
+ kill -$sig $*>/dev/null
}
#
@@ -269,11 +289,23 @@ is_oralsnr_up() {
[ x != "x`eval $procs`" ]
}
oralsnr_status() {
- $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
+ output=`$lsnrctl status $listener`
+ echo "$output" | tail -1 | grep -qs 'completed successfully'
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ ocf_log info "$listener status failed: $output"
+ fi
+ return $RET
}
# and does it work?
tnsping() {
- $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
+ output=`$tnsping $ORACLE_SID`
+ echo "$output" | tail -1 | grep -qs '^OK'
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ ocf_log info "$tnsping $ORACLE_SID failed: $output"
+ fi
+ return $RET
}
#
@@ -285,10 +317,10 @@ oralsnr_monitor() {
: good
#ocf_log info "Listener $listener running"
return $OCF_SUCCESS
- else
+ else
ocf_log info "Listener $listener not running"
return $OCF_NOT_RUNNING
- fi
+ fi
}
#
@@ -368,10 +400,10 @@ case "$1" in
status) if oralsnr_status
then
- echo Listener $listener is running
+ ocf_log info "Listener $listener is running"
exit $OCF_SUCCESS
else
- echo Listener $listener is stopped
+ ocf_log info "Listener $listener is stopped"
exit $OCF_NOT_RUNNING
fi
;;
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
# HG changeset patch
# User NAKAHIRA Kazutomo <nakahira.kazut...@oss.ntt.co.jp>
# Date 1266198104 -32400
# Node ID ee6e1032df4e4f24b3fb19c5f7e1c7f3d0e148a9
# Parent 5737f981ade0c07d78da4a74515f13d190f9833e
oracle: improve logging
diff -r 5737f981ade0 -r ee6e1032df4e heartbeat/oracle
--- a/heartbeat/oracle Wed Feb 10 11:28:34 2010 +0100
+++ b/heartbeat/oracle Mon Feb 15 10:41:44 2010 +0900
@@ -231,11 +231,23 @@
testoraenv() {
# Let's make sure a few important things are set...
- [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+ if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+ ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL.
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
return 1
+ fi
# and some important things are there
- [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+ if [ ! -x "$sqlplus" ]; then
+ ocf_log err "Executable sqlplus command($sqlplus) dose not
exist."
return 1
+ fi
+ if [ ! -x "$lsnrctl" ]; then
+ ocf_log err "Executable lsnrctl command($lsnrctl) dose not
exist."
+ return 1
+ fi
+ if [ ! -x "$tnsping" ]; then
+ ocf_log err "Executable tnsping command($tnsping) dose not
exist."
+ return 1
+ fi
return 0
}
@@ -367,13 +379,20 @@
# Part 1: Oracle
dumpinstipc() {
local dumpdest=`dbasql getdumpdest`
- [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
+ if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
+ ocf_log warn "dumpdest($dumpdest) is not a regular directory."
+ return 1
+ fi
local -i fcount=`ls -rt $dumpdest | wc -l`
- dbasql getipc >/dev/null 2>&1
+ output=`dbasql getipc`
local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
local -i fcount2=`ls -rt $dumpdest | wc -l`
- [ $((fcount+1)) -eq $fcount2 ] || return 1 # more than one file created
- echo $dumpdest/$lastf
+ if [ $((fcount+1)) -eq $fcount2 ]; then
+ echo $dumpdest/$lastf
+ else
+ ocf_log warn "dumpinstipc failed because the number of output
files is wrong. File count before the dump =$fcount, file count after the
dump=$fcount2, getipc result=$output"
+ return 1
+ fi
}
parseipc() {
local inf=$1
@@ -440,7 +459,13 @@
}
# instance in OPEN state?
instance_live() {
- [ "`dbasql dbstat`" = OPEN ]
+ output=`dbasql dbstat`
+ if [ "$output" = OPEN ]; then
+ return 0
+ else
+ ocf_log info "Instance state is not OPEN. dbstat result=$output"
+ return 1
+ fi
}
ora_cleanup() {
@@ -498,6 +523,7 @@
# try to cleanup in case of
# ORA-01081: cannot start already-running ORACLE - shut it down
first
if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
+ ocf_log info "ORA-01081 error was found. Try to cleanup
oracle. DB start output=$output"
ora_cleanup
output=`dbasql dbstart_mount`
fi
@@ -510,7 +536,7 @@
;;
*)
: error!!
- ocf_log error "Oracle $ORACLE_SID can not mount."
+ ocf_log error "Oracle $ORACLE_SID can not mount. DB
status=$status, DB start output=$output"
return $OCF_ERR_GENERIC
;;
esac
@@ -523,13 +549,16 @@
fi
output=`dbasql dbopen`
- if is_oracle_up && instance_live; then
+ if ! is_oracle_up; then
+ ocf_log err "Oracle process is not started: $output"
+ return $OCF_ERR_GENERIC
+ elif ! instance_live; then
+ ocf_log err "Oracle instance $ORACLE_SID not started: $output"
+ return $OCF_ERR_GENERIC
+ else
: cool, we are up and running
ocf_log info "Oracle instance $ORACLE_SID started: $output"
return $OCF_SUCCESS
- else
- ocf_log err "Oracle instance $ORACLE_SID not started: $output"
- return $OCF_ERR_GENERIC
fi
}
@@ -562,13 +591,21 @@
killprocs() {
local sig=$1
shift 1
- kill -$sig $* >/dev/null 2>&1
+ # Record stderr
+ kill -$sig $* >/dev/null
}
ora_kill() {
- killprocs TERM `eval $procs | awk '{print $1}'`
+ oraprocs=`eval $procs | awk '{print $1}'`
+ if [ -z "$oraprocs" ]; then
+ ocf_log debug "All oracle processes are already stopped."
+ return
+ fi
+ killprocs TERM $oraprocs
for i in 1 2 3 4 5; do
- killprocs 0 `eval $procs | awk '{print $1}'` ||
+ if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+ ocf_log debug "All oracle processes are killed."
return
+ fi
sleep 5
done
killprocs KILL `eval $procs | awk '{print $1}'`
@@ -578,14 +615,16 @@
# oracle_monitor: Can the Oracle instance do anything useful?
#
oracle_monitor() {
- if is_oracle_up && instance_live
- then
- #ocf_log info "Oracle instance $ORACLE_SID is alive"
- return $OCF_SUCCESS
- else
+ if ! is_oracle_up; then
+ ocf_log info "Oracle process is down"
+ return $OCF_NOT_RUNNING
+ fi
+ if ! instance_live; then
ocf_log info "Oracle instance $ORACLE_SID is down"
return $OCF_NOT_RUNNING
fi
+ #ocf_log info "Oracle instance $ORACLE_SID is alive"
+ return $OCF_SUCCESS
}
#
# HG changeset patch
# User NAKAHIRA Kazutomo <nakahira.kazut...@oss.ntt.co.jp>
# Date 1266198714 -32400
# Node ID e619d11ea3a4514bfeb6578b3178b750c3644699
# Parent ee6e1032df4e4f24b3fb19c5f7e1c7f3d0e148a9
oralsnr: improve logging
diff -r ee6e1032df4e -r e619d11ea3a4 heartbeat/oralsnr
--- a/heartbeat/oralsnr Mon Feb 15 10:41:44 2010 +0900
+++ b/heartbeat/oralsnr Mon Feb 15 10:51:54 2010 +0900
@@ -158,11 +158,23 @@
testoraenv() {
# Let's make sure a few important things are set...
- [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+ if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+ ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL.
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
return 1
+ fi
# and some important things are there
- [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+ if [ ! -x "$sqlplus" ]; then
+ ocf_log err "Executable sqlplus command($sqlplus) dose not
exist."
return 1
+ fi
+ if [ ! -x "$lsnrctl" ]; then
+ ocf_log err "Executable lsnrctl command($lsnrctl) dose not
exist."
+ return 1
+ fi
+ if [ ! -x "$tnsping" ]; then
+ ocf_log err "Executable tnsping command($tnsping) dose not
exist."
+ return 1
+ fi
return 0
}
@@ -247,10 +259,17 @@
# kill the listener procs
# give them 10 secs to exit cleanly (5 times 2)
oralsnr_kill() {
- killprocs TERM `eval $procs | awk '{print $1}'`
+ oraprocs=`eval $procs | awk '{print $1}'`
+ if [ -z "$oraprocs" ]; then
+ ocf_log debug "All oralsnr processes are already stopped."
+ return
+ fi
+ killprocs TERM $oraprocs
for i in 1 2 3 4 5; do
- killprocs 0 `eval $procs | awk '{print $1}'` ||
+ if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+ ocf_log debug "All oralsnr processes are killed."
return
+ fi
sleep 2
done
killprocs KILL `eval $procs | awk '{print $1}'`
@@ -258,7 +277,8 @@
killprocs() {
sig=$1
shift 1
- kill -$sig $* >/dev/null 2>&1
+ # Record stderr
+ kill -$sig $* >/dev/null
}
#
@@ -269,11 +289,23 @@
[ x != "x`eval $procs`" ]
}
oralsnr_status() {
- $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
+ output=`$lsnrctl status $listener`
+ echo "$output" | tail -1 | grep -qs 'completed successfully'
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ ocf_log info "$listener status failed: $output"
+ fi
+ return $RET
}
# and does it work?
tnsping() {
- $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
+ output=`$tnsping $ORACLE_SID`
+ echo "$output" | tail -1 | grep -qs '^OK'
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ ocf_log info "$tnsping $ORACLE_SID failed: $output"
+ fi
+ return $RET
}
#
@@ -285,10 +317,10 @@
: good
#ocf_log info "Listener $listener running"
return $OCF_SUCCESS
- else
+ else
ocf_log info "Listener $listener not running"
return $OCF_NOT_RUNNING
- fi
+ fi
}
#
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/