Hi, Dejan Thank you for your comment for my patch. And sorry for not getting back to you any earlier.
I agree to all your points. Could you commit patches to the agents repository? Best Regards, NAKAHIRA Kazutomo Quoting Dejan Muhamedagic <deja...@fastmail.fm>: > Hi Kazutomo-san, > > On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote: >> Hi, all >> >> I improved logging of the oracle/oralsnr RA. >> This patch has aimed to record the execution result of the sqlplus >> (and other commands) and output a detailed failure log using ocf_log >> when some problems occurred in RA operation. > > Thanks for the patch. There are a few spelling problems, but I'll > fix those. Also, the status operation must output to stdout, > that's not meant for the logs. > > Cheers, > > Dejan > >> Best Regards, >> NAKAHIRA Kazutomo >> >> -- >> ---------------------------------------- >> NAKAHIRA Kazutomo >> NTT DATA INTELLILINK CORPORATION >> Open Source Business Unit >> Software Services Integration Business Division > >> # HG changeset patch >> # User r...@prec370b >> # Date 1264145021 -32400 >> # Node ID 0ecef9560522601936888ce168dae5f563662402 >> # Parent 3024963150433960c51aa1bdccde39839efb09b7 >> oracle: improve logging >> >> diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle >> --- a/heartbeat/oracle Thu Jan 21 16:42:40 2010 +0100 >> +++ b/heartbeat/oracle Fri Jan 22 16:23:41 2010 +0900 >> @@ -231,11 +231,23 @@ ora_info() { >> >> testoraenv() { >> # Let's make sure a few important things are set... >> - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] || >> + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then >> + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. >> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." >> return 1 >> + fi >> # and some important things are there >> - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] || >> + if [ ! -x "$sqlplus" ]; then >> + ocf_log err "Executeble sqlplus command($sqlplus) dose not >> exist." >> return 1 >> + fi >> + if [ ! -x "$lsnrctl" ]; then >> + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not >> exist." >> + return 1 >> + fi >> + if [ ! -x "$tnsping" ]; then >> + ocf_log err "Executeble tnsping command($tnsping) dose not >> exist." >> + return 1 >> + fi >> return 0 >> } >> >> @@ -367,13 +379,20 @@ showdbstat() { >> # Part 1: Oracle >> dumpinstipc() { >> local dumpdest=`dbasql getdumpdest` >> - [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1 >> + if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then >> + ocf_log warn "dumpdest($dumpdest) is not a regular directory." >> + return 1 >> + fi >> local -i fcount=`ls -rt $dumpdest | wc -l` >> - dbasql getipc >/dev/null 2>&1 >> + output=`dbasql getipc` >> local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1` >> local -i fcount2=`ls -rt $dumpdest | wc -l` >> - [ $((fcount+1)) -eq $fcount2 ] || return 1 # more than one file created >> - echo $dumpdest/$lastf >> + if [ $((fcount+1)) -eq $fcount2 ]; then >> + echo $dumpdest/$lastf >> + else >> + ocf_log warn "dumpinstipc failed bacause the number of output >> files is wrong. before dump file count=$fcount, after dump file >> count=$fcount2, getipc result=$output" >> + return 1 >> + fi >> } >> parseipc() { >> local inf=$1 >> @@ -440,7 +459,13 @@ is_oracle_up() { >> } >> # instance in OPEN state? >> instance_live() { >> - [ "`dbasql dbstat`" = OPEN ] >> + output=`dbasql dbstat` >> + if [ "$output" = OPEN ]; then >> + return 0 >> + else >> + ocf_log info "Instance state is not OPEN. dbstat result=$output" >> + return 1 >> + fi >> } >> >> ora_cleanup() { >> @@ -498,6 +523,7 @@ oracle_start() { >> # try to cleanup in case of >> # ORA-01081: cannot start already-running ORACLE - shut it down >> first >> if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then >> + ocf_log info "ORA-01081 error was found. try to cleanup >> oracle. >> DB start output=$output" >> ora_cleanup >> output=`dbasql dbstart_mount` >> fi >> @@ -510,7 +536,7 @@ oracle_start() { >> ;; >> *) >> : error!! >> - ocf_log error "Oracle $ORACLE_SID can not mount." >> + ocf_log error "Oracle $ORACLE_SID can not mount. DB >> status=$status, DB start output=$output" >> return $OCF_ERR_GENERIC >> ;; >> esac >> @@ -523,13 +549,16 @@ oracle_start() { >> fi >> output=`dbasql dbopen` >> >> - if is_oracle_up && instance_live; then >> + if ! is_oracle_up; then >> + ocf_log err "Oracle process is not started: $output" >> + return $OCF_ERR_GENERIC >> + elif ! instance_live; then >> + ocf_log err "Oracle instance $ORACLE_SID not started: $output" >> + return $OCF_ERR_GENERIC >> + else >> : cool, we are up and running >> ocf_log info "Oracle instance $ORACLE_SID started: $output" >> return $OCF_SUCCESS >> - else >> - ocf_log err "Oracle instance $ORACLE_SID not started: $output" >> - return $OCF_ERR_GENERIC >> fi >> } >> >> @@ -562,13 +591,21 @@ killprocs() { >> killprocs() { >> local sig=$1 >> shift 1 >> - kill -$sig $* >/dev/null 2>&1 >> + # Record stderr >> + kill -$sig $* >/dev/null >> } >> ora_kill() { >> - killprocs TERM `eval $procs | awk '{print $1}'` >> + oraprocs=`eval $procs | awk '{print $1}'` >> + if [ -z "$oraprocs" ]; then >> + ocf_log debug "All oracle processes are already stopped." >> + return >> + fi >> + killprocs TERM $oraprocs >> for i in 1 2 3 4 5; do >> - killprocs 0 `eval $procs | awk '{print $1}'` || >> + if [ -z "`eval $procs | awk '{print $1}'`" ]; then >> + ocf_log debug "All oracle processes are killed." >> return >> + fi >> sleep 5 >> done >> killprocs KILL `eval $procs | awk '{print $1}'` >> @@ -578,14 +615,16 @@ ora_kill() { >> # oracle_monitor: Can the Oracle instance do anything useful? >> # >> oracle_monitor() { >> - if is_oracle_up && instance_live >> - then >> - #ocf_log info "Oracle instance $ORACLE_SID is alive" >> - return $OCF_SUCCESS >> - else >> + if ! is_oracle_up; then >> + ocf_log info "Oracle process is down" >> + return $OCF_NOT_RUNNING >> + fi >> + if ! instance_live; then >> ocf_log info "Oracle instance $ORACLE_SID is down" >> return $OCF_NOT_RUNNING >> fi >> + #ocf_log info "Oracle instance $ORACLE_SID is alive" >> + return $OCF_SUCCESS >> } >> >> # >> @@ -675,10 +714,10 @@ case "$1" in >> >> status) if is_oracle_up >> then >> - echo Oracle instance $ORACLE_SID is running >> + ocf_log info "Oracle instance $ORACLE_SID is running" >> exit $OCF_SUCCESS >> else >> - echo Oracle instance $ORACLE_SID is stopped >> + ocf_log info "Oracle instance $ORACLE_SID is stopped" >> exit $OCF_NOT_RUNNING >> fi >> ;; > >> # HG changeset patch >> # User r...@prec370b >> # Date 1264145083 -32400 >> # Node ID b36abb1554500f4bdf33858989205d02606c609d >> # Parent 0ecef9560522601936888ce168dae5f563662402 >> oralsnr: improve logging >> >> diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr >> --- a/heartbeat/oralsnr Fri Jan 22 16:23:41 2010 +0900 >> +++ b/heartbeat/oralsnr Fri Jan 22 16:24:43 2010 +0900 >> @@ -158,11 +158,23 @@ ora_info() { >> >> testoraenv() { >> # Let's make sure a few important things are set... >> - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] || >> + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then >> + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. >> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." >> return 1 >> + fi >> # and some important things are there >> - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] || >> + if [ ! -x "$sqlplus" ]; then >> + ocf_log err "Executeble sqlplus command($sqlplus) dose not >> exist." >> return 1 >> + fi >> + if [ ! -x "$lsnrctl" ]; then >> + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not >> exist." >> + return 1 >> + fi >> + if [ ! -x "$tnsping" ]; then >> + ocf_log err "Executeble tnsping command($tnsping) dose not >> exist." >> + return 1 >> + fi >> return 0 >> } >> >> @@ -247,10 +259,17 @@ oralsnr_stop() { >> # kill the listener procs >> # give them 10 secs to exit cleanly (5 times 2) >> oralsnr_kill() { >> - killprocs TERM `eval $procs | awk '{print $1}'` >> + oraprocs=`eval $procs | awk '{print $1}'` >> + if [ -z "$oraprocs" ]; then >> + ocf_log debug "All oralsnr processes are already stopped." >> + return >> + fi >> + killprocs TERM $oraprocs >> for i in 1 2 3 4 5; do >> - killprocs 0 `eval $procs | awk '{print $1}'` || >> + if [ -z "`eval $procs | awk '{print $1}'`" ]; then >> + ocf_log debug "All oralsnr processes are killed." >> return >> + fi >> sleep 2 >> done >> killprocs KILL `eval $procs | awk '{print $1}'` >> @@ -258,7 +277,8 @@ killprocs() { >> killprocs() { >> sig=$1 >> shift 1 >> - kill -$sig $* >/dev/null 2>&1 >> + # Record stderr >> + kill -$sig $* >/dev/null >> } >> >> # >> @@ -269,11 +289,23 @@ is_oralsnr_up() { >> [ x != "x`eval $procs`" ] >> } >> oralsnr_status() { >> - $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully' >> + output=`$lsnrctl status $listener` >> + echo "$output" | tail -1 | grep -qs 'completed successfully' >> + RET=$? >> + if [ $RET -ne 0 ]; then >> + ocf_log info "$listener status failed: $output" >> + fi >> + return $RET >> } >> # and does it work? >> tnsping() { >> - $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK' >> + output=`$tnsping $ORACLE_SID` >> + echo "$output" | tail -1 | grep -qs '^OK' >> + RET=$? >> + if [ $RET -ne 0 ]; then >> + ocf_log info "$tnsping $ORACLE_SID failed: $output" >> + fi >> + return $RET >> } >> >> # >> @@ -285,10 +317,10 @@ oralsnr_monitor() { >> : good >> #ocf_log info "Listener $listener running" >> return $OCF_SUCCESS >> - else >> + else >> ocf_log info "Listener $listener not running" >> return $OCF_NOT_RUNNING >> - fi >> + fi >> } >> >> # >> @@ -368,10 +400,10 @@ case "$1" in >> >> status) if oralsnr_status >> then >> - echo Listener $listener is running >> + ocf_log info "Listener $listener is running" >> exit $OCF_SUCCESS >> else >> - echo Listener $listener is stopped >> + ocf_log info "Listener $listener is stopped" >> exit $OCF_NOT_RUNNING >> fi >> ;; > >> _______________________________________________________ >> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org >> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev >> Home Page: http://linux-ha.org/ > > _______________________________________________________ > Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org > http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev > Home Page: http://linux-ha.org/ > > _______________________________________________________ Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page: http://linux-ha.org/