Hi Kazutomo-san, On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote: > Hi, all > > I improved logging of the oracle/oralsnr RA. > This patch has aimed to record the execution result of the sqlplus > (and other commands) and output a detailed failure log using ocf_log > when some problems occurred in RA operation.
Thanks for the patch. There are a few spelling problems, but I'll fix those. Also, the status operation must output to stdout, that's not meant for the logs. Cheers, Dejan > Best Regards, > NAKAHIRA Kazutomo > > -- > ---------------------------------------- > NAKAHIRA Kazutomo > NTT DATA INTELLILINK CORPORATION > Open Source Business Unit > Software Services Integration Business Division > # HG changeset patch > # User r...@prec370b > # Date 1264145021 -32400 > # Node ID 0ecef9560522601936888ce168dae5f563662402 > # Parent 3024963150433960c51aa1bdccde39839efb09b7 > oracle: improve logging > > diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle > --- a/heartbeat/oracle Thu Jan 21 16:42:40 2010 +0100 > +++ b/heartbeat/oracle Fri Jan 22 16:23:41 2010 +0900 > @@ -231,11 +231,23 @@ ora_info() { > > testoraenv() { > # Let's make sure a few important things are set... > - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] || > + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then > + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. > ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." > return 1 > + fi > # and some important things are there > - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] || > + if [ ! -x "$sqlplus" ]; then > + ocf_log err "Executeble sqlplus command($sqlplus) dose not > exist." > return 1 > + fi > + if [ ! -x "$lsnrctl" ]; then > + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not > exist." > + return 1 > + fi > + if [ ! -x "$tnsping" ]; then > + ocf_log err "Executeble tnsping command($tnsping) dose not > exist." > + return 1 > + fi > return 0 > } > > @@ -367,13 +379,20 @@ showdbstat() { > # Part 1: Oracle > dumpinstipc() { > local dumpdest=`dbasql getdumpdest` > - [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1 > + if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then > + ocf_log warn "dumpdest($dumpdest) is not a regular directory." > + return 1 > + fi > local -i fcount=`ls -rt $dumpdest | wc -l` > - dbasql getipc >/dev/null 2>&1 > + output=`dbasql getipc` > local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1` > local -i fcount2=`ls -rt $dumpdest | wc -l` > - [ $((fcount+1)) -eq $fcount2 ] || return 1 # more than one file created > - echo $dumpdest/$lastf > + if [ $((fcount+1)) -eq $fcount2 ]; then > + echo $dumpdest/$lastf > + else > + ocf_log warn "dumpinstipc failed bacause the number of output > files is wrong. before dump file count=$fcount, after dump file > count=$fcount2, getipc result=$output" > + return 1 > + fi > } > parseipc() { > local inf=$1 > @@ -440,7 +459,13 @@ is_oracle_up() { > } > # instance in OPEN state? > instance_live() { > - [ "`dbasql dbstat`" = OPEN ] > + output=`dbasql dbstat` > + if [ "$output" = OPEN ]; then > + return 0 > + else > + ocf_log info "Instance state is not OPEN. dbstat result=$output" > + return 1 > + fi > } > > ora_cleanup() { > @@ -498,6 +523,7 @@ oracle_start() { > # try to cleanup in case of > # ORA-01081: cannot start already-running ORACLE - shut it down > first > if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then > + ocf_log info "ORA-01081 error was found. try to cleanup > oracle. DB start output=$output" > ora_cleanup > output=`dbasql dbstart_mount` > fi > @@ -510,7 +536,7 @@ oracle_start() { > ;; > *) > : error!! > - ocf_log error "Oracle $ORACLE_SID can not mount." > + ocf_log error "Oracle $ORACLE_SID can not mount. DB > status=$status, DB start output=$output" > return $OCF_ERR_GENERIC > ;; > esac > @@ -523,13 +549,16 @@ oracle_start() { > fi > output=`dbasql dbopen` > > - if is_oracle_up && instance_live; then > + if ! is_oracle_up; then > + ocf_log err "Oracle process is not started: $output" > + return $OCF_ERR_GENERIC > + elif ! instance_live; then > + ocf_log err "Oracle instance $ORACLE_SID not started: $output" > + return $OCF_ERR_GENERIC > + else > : cool, we are up and running > ocf_log info "Oracle instance $ORACLE_SID started: $output" > return $OCF_SUCCESS > - else > - ocf_log err "Oracle instance $ORACLE_SID not started: $output" > - return $OCF_ERR_GENERIC > fi > } > > @@ -562,13 +591,21 @@ killprocs() { > killprocs() { > local sig=$1 > shift 1 > - kill -$sig $* >/dev/null 2>&1 > + # Record stderr > + kill -$sig $* >/dev/null > } > ora_kill() { > - killprocs TERM `eval $procs | awk '{print $1}'` > + oraprocs=`eval $procs | awk '{print $1}'` > + if [ -z "$oraprocs" ]; then > + ocf_log debug "All oracle processes are already stopped." > + return > + fi > + killprocs TERM $oraprocs > for i in 1 2 3 4 5; do > - killprocs 0 `eval $procs | awk '{print $1}'` || > + if [ -z "`eval $procs | awk '{print $1}'`" ]; then > + ocf_log debug "All oracle processes are killed." > return > + fi > sleep 5 > done > killprocs KILL `eval $procs | awk '{print $1}'` > @@ -578,14 +615,16 @@ ora_kill() { > # oracle_monitor: Can the Oracle instance do anything useful? > # > oracle_monitor() { > - if is_oracle_up && instance_live > - then > - #ocf_log info "Oracle instance $ORACLE_SID is alive" > - return $OCF_SUCCESS > - else > + if ! is_oracle_up; then > + ocf_log info "Oracle process is down" > + return $OCF_NOT_RUNNING > + fi > + if ! instance_live; then > ocf_log info "Oracle instance $ORACLE_SID is down" > return $OCF_NOT_RUNNING > fi > + #ocf_log info "Oracle instance $ORACLE_SID is alive" > + return $OCF_SUCCESS > } > > # > @@ -675,10 +714,10 @@ case "$1" in > > status) if is_oracle_up > then > - echo Oracle instance $ORACLE_SID is running > + ocf_log info "Oracle instance $ORACLE_SID is running" > exit $OCF_SUCCESS > else > - echo Oracle instance $ORACLE_SID is stopped > + ocf_log info "Oracle instance $ORACLE_SID is stopped" > exit $OCF_NOT_RUNNING > fi > ;; > # HG changeset patch > # User r...@prec370b > # Date 1264145083 -32400 > # Node ID b36abb1554500f4bdf33858989205d02606c609d > # Parent 0ecef9560522601936888ce168dae5f563662402 > oralsnr: improve logging > > diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr > --- a/heartbeat/oralsnr Fri Jan 22 16:23:41 2010 +0900 > +++ b/heartbeat/oralsnr Fri Jan 22 16:24:43 2010 +0900 > @@ -158,11 +158,23 @@ ora_info() { > > testoraenv() { > # Let's make sure a few important things are set... > - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] || > + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then > + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. > ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." > return 1 > + fi > # and some important things are there > - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] || > + if [ ! -x "$sqlplus" ]; then > + ocf_log err "Executeble sqlplus command($sqlplus) dose not > exist." > return 1 > + fi > + if [ ! -x "$lsnrctl" ]; then > + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not > exist." > + return 1 > + fi > + if [ ! -x "$tnsping" ]; then > + ocf_log err "Executeble tnsping command($tnsping) dose not > exist." > + return 1 > + fi > return 0 > } > > @@ -247,10 +259,17 @@ oralsnr_stop() { > # kill the listener procs > # give them 10 secs to exit cleanly (5 times 2) > oralsnr_kill() { > - killprocs TERM `eval $procs | awk '{print $1}'` > + oraprocs=`eval $procs | awk '{print $1}'` > + if [ -z "$oraprocs" ]; then > + ocf_log debug "All oralsnr processes are already stopped." > + return > + fi > + killprocs TERM $oraprocs > for i in 1 2 3 4 5; do > - killprocs 0 `eval $procs | awk '{print $1}'` || > + if [ -z "`eval $procs | awk '{print $1}'`" ]; then > + ocf_log debug "All oralsnr processes are killed." > return > + fi > sleep 2 > done > killprocs KILL `eval $procs | awk '{print $1}'` > @@ -258,7 +277,8 @@ killprocs() { > killprocs() { > sig=$1 > shift 1 > - kill -$sig $* >/dev/null 2>&1 > + # Record stderr > + kill -$sig $* >/dev/null > } > > # > @@ -269,11 +289,23 @@ is_oralsnr_up() { > [ x != "x`eval $procs`" ] > } > oralsnr_status() { > - $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully' > + output=`$lsnrctl status $listener` > + echo "$output" | tail -1 | grep -qs 'completed successfully' > + RET=$? > + if [ $RET -ne 0 ]; then > + ocf_log info "$listener status failed: $output" > + fi > + return $RET > } > # and does it work? > tnsping() { > - $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK' > + output=`$tnsping $ORACLE_SID` > + echo "$output" | tail -1 | grep -qs '^OK' > + RET=$? > + if [ $RET -ne 0 ]; then > + ocf_log info "$tnsping $ORACLE_SID failed: $output" > + fi > + return $RET > } > > # > @@ -285,10 +317,10 @@ oralsnr_monitor() { > : good > #ocf_log info "Listener $listener running" > return $OCF_SUCCESS > - else > + else > ocf_log info "Listener $listener not running" > return $OCF_NOT_RUNNING > - fi > + fi > } > > # > @@ -368,10 +400,10 @@ case "$1" in > > status) if oralsnr_status > then > - echo Listener $listener is running > + ocf_log info "Listener $listener is running" > exit $OCF_SUCCESS > else > - echo Listener $listener is stopped > + ocf_log info "Listener $listener is stopped" > exit $OCF_NOT_RUNNING > fi > ;; > _______________________________________________________ > Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org > http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev > Home Page: http://linux-ha.org/ _______________________________________________________ Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page: http://linux-ha.org/