Hi, Dejan

I fixed spelling mistakes in the oracle/oralsnr patch
and restore the status operation(please see attached).

And I'd like to make sure that which log in the
monitor operations is unnecessary?

Best regards,
NAKAHIRA Kazutomo

(2010/02/11 19:51), Dejan Muhamedagic wrote:
Hi Kazutomo-san,

On Wed, Feb 10, 2010 at 11:11:42AM +0900, nakah...@intellilink.co.jp wrote:
Hi, Dejan

Thank you for your comment for my patch.
And sorry for not getting back to you any earlier.

I agree to all your points.
Could you commit patches to the agents repository?

It turns out that the patches need quite a bit of work and I'm
afraid I can't afford to spend the time right now. Can you please
do that yourself: fix spelling mistakes, don't log unnecessarily
in the monitor operations, and the status operation should output
to stdout and not log.

Thanks,

Dejan

Best Regards,
NAKAHIRA Kazutomo

Quoting Dejan Muhamedagic<deja...@fastmail.fm>:

Hi Kazutomo-san,

On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote:
Hi, all

I improved logging of the oracle/oralsnr RA.
This patch has aimed to record the execution result of the sqlplus
(and other commands) and output a detailed failure log using ocf_log
when some problems occurred in RA operation.

Thanks for the patch. There are a few spelling problems, but I'll
fix those. Also, the status operation must output to stdout,
that's not meant for the logs.

Cheers,

Dejan

Best Regards,
NAKAHIRA Kazutomo

--
----------------------------------------
NAKAHIRA Kazutomo
NTT DATA INTELLILINK CORPORATION
Open Source Business Unit
Software Services Integration Business Division

# HG changeset patch
# User r...@prec370b
# Date 1264145021 -32400
# Node ID 0ecef9560522601936888ce168dae5f563662402
# Parent  3024963150433960c51aa1bdccde39839efb09b7
oracle: improve logging

diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle
--- a/heartbeat/oracle  Thu Jan 21 16:42:40 2010 +0100
+++ b/heartbeat/oracle  Fri Jan 22 16:23:41 2010 +0900
@@ -231,11 +231,23 @@ ora_info() {

  testoraenv() {
        #       Let's make sure a few important things are set...
-       [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+       if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+               ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL.
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
                return 1
+       fi
        #       and some important things are there
-       [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+       if [ ! -x "$sqlplus" ]; then
+               ocf_log err "Executeble sqlplus command($sqlplus) dose not 
exist."
                return 1
+       fi
+       if [ ! -x "$lsnrctl" ]; then
+               ocf_log err "Executeble lsnrctl command($lsnrctl) dose not 
exist."
+               return 1
+       fi
+       if [ ! -x "$tnsping" ]; then
+               ocf_log err "Executeble tnsping command($tnsping) dose not 
exist."
+               return 1
+       fi
        return 0
  }

@@ -367,13 +379,20 @@ showdbstat() {
  # Part 1: Oracle
  dumpinstipc() {
        local dumpdest=`dbasql getdumpdest`
-       [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
+       if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
+               ocf_log warn "dumpdest($dumpdest) is not a regular directory."
+               return 1
+       fi
        local -i fcount=`ls -rt $dumpdest | wc -l`
-       dbasql getipc>/dev/null 2>&1
+       output=`dbasql getipc`
        local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
        local -i fcount2=`ls -rt $dumpdest | wc -l`
-       [ $((fcount+1)) -eq $fcount2 ] || return 1  # more than one file created
-       echo $dumpdest/$lastf
+       if [ $((fcount+1)) -eq $fcount2 ]; then
+               echo $dumpdest/$lastf
+       else
+               ocf_log warn "dumpinstipc failed bacause the number of output
files is wrong. before dump file count=$fcount, after dump file
count=$fcount2, getipc result=$output"
+               return 1
+       fi
  }
  parseipc() {
        local inf=$1
@@ -440,7 +459,13 @@ is_oracle_up() {
  }
  # instance in OPEN state?
  instance_live() {
-       [ "`dbasql dbstat`" = OPEN ]
+       output=`dbasql dbstat`
+       if [ "$output" = OPEN ]; then
+               return 0
+       else
+               ocf_log info "Instance state is not OPEN. dbstat result=$output"
+               return 1
+       fi
  }

  ora_cleanup() {
@@ -498,6 +523,7 @@ oracle_start() {
                # try to cleanup in case of
                # ORA-01081: cannot start already-running ORACLE - shut it down 
first
                if echo "$output" | grep ORA-01081>/dev/null 2>&1; then
+                       ocf_log info "ORA-01081 error was found. try to cleanup 
oracle.
DB start output=$output"
                        ora_cleanup
                        output=`dbasql dbstart_mount`
                fi
@@ -510,7 +536,7 @@ oracle_start() {
                ;;
        *)
                : error!!
-               ocf_log error "Oracle $ORACLE_SID can not mount."
+               ocf_log error "Oracle $ORACLE_SID can not mount. DB
status=$status, DB start output=$output"
                return $OCF_ERR_GENERIC
                ;;
        esac
@@ -523,13 +549,16 @@ oracle_start() {
        fi
        output=`dbasql dbopen`

-       if is_oracle_up&&  instance_live; then
+       if ! is_oracle_up; then
+               ocf_log err "Oracle process is not started: $output"
+               return $OCF_ERR_GENERIC
+       elif ! instance_live; then
+               ocf_log err "Oracle instance $ORACLE_SID not started: $output"
+               return $OCF_ERR_GENERIC
+       else
                : cool, we are up and running
                ocf_log info "Oracle instance $ORACLE_SID started: $output"
                return $OCF_SUCCESS
-       else
-               ocf_log err "Oracle instance $ORACLE_SID not started: $output"
-               return $OCF_ERR_GENERIC
        fi
  }

@@ -562,13 +591,21 @@ killprocs() {
  killprocs() {
        local sig=$1
        shift 1
-       kill -$sig $*>/dev/null 2>&1
+       # Record stderr
+       kill -$sig $*>/dev/null
  }
  ora_kill() {
-       killprocs TERM `eval $procs | awk '{print $1}'`
+       oraprocs=`eval $procs | awk '{print $1}'`
+       if [ -z "$oraprocs" ]; then
+               ocf_log debug "All oracle processes are already stopped."
+               return
+       fi
+       killprocs TERM $oraprocs
        for i in 1 2 3 4 5; do
-               killprocs 0 `eval $procs | awk '{print $1}'` ||
+               if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+                       ocf_log debug "All oracle processes are killed."
                        return
+               fi
                sleep 5
        done
        killprocs KILL `eval $procs | awk '{print $1}'`
@@ -578,14 +615,16 @@ ora_kill() {
  # oracle_monitor: Can the Oracle instance do anything useful?
  #
  oracle_monitor() {
-       if is_oracle_up&&  instance_live
-       then
-               #ocf_log info "Oracle instance $ORACLE_SID is alive"
-               return $OCF_SUCCESS
-       else
+       if ! is_oracle_up; then
+               ocf_log info "Oracle process is down"
+               return $OCF_NOT_RUNNING
+       fi
+       if ! instance_live; then
                ocf_log info "Oracle instance $ORACLE_SID is down"
                return $OCF_NOT_RUNNING
        fi
+       #ocf_log info "Oracle instance $ORACLE_SID is alive"
+       return $OCF_SUCCESS
  }

  #
@@ -675,10 +714,10 @@ case "$1" in

    status)     if is_oracle_up
                then
-                 echo Oracle instance $ORACLE_SID is running
+                 ocf_log info "Oracle instance $ORACLE_SID is running"
                  exit $OCF_SUCCESS
                else
-                 echo Oracle instance $ORACLE_SID is stopped
+                 ocf_log info "Oracle instance $ORACLE_SID is stopped"
                  exit $OCF_NOT_RUNNING
                fi
                ;;

# HG changeset patch
# User r...@prec370b
# Date 1264145083 -32400
# Node ID b36abb1554500f4bdf33858989205d02606c609d
# Parent  0ecef9560522601936888ce168dae5f563662402
oralsnr: improve logging

diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr
--- a/heartbeat/oralsnr Fri Jan 22 16:23:41 2010 +0900
+++ b/heartbeat/oralsnr Fri Jan 22 16:24:43 2010 +0900
@@ -158,11 +158,23 @@ ora_info() {

  testoraenv() {
        #       Let's make sure a few important things are set...
-       [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+       if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+               ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL.
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
                return 1
+       fi
        #       and some important things are there
-       [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+       if [ ! -x "$sqlplus" ]; then
+               ocf_log err "Executeble sqlplus command($sqlplus) dose not 
exist."
                return 1
+       fi
+       if [ ! -x "$lsnrctl" ]; then
+               ocf_log err "Executeble lsnrctl command($lsnrctl) dose not 
exist."
+               return 1
+       fi
+       if [ ! -x "$tnsping" ]; then
+               ocf_log err "Executeble tnsping command($tnsping) dose not 
exist."
+               return 1
+       fi
        return 0
  }

@@ -247,10 +259,17 @@ oralsnr_stop() {
  # kill the listener procs
  # give them 10 secs to exit cleanly (5 times 2)
  oralsnr_kill() {
-       killprocs TERM `eval $procs | awk '{print $1}'`
+       oraprocs=`eval $procs | awk '{print $1}'`
+       if [ -z "$oraprocs" ]; then
+               ocf_log debug "All oralsnr processes are already stopped."
+               return
+       fi
+       killprocs TERM $oraprocs
        for i in 1 2 3 4 5; do
-               killprocs 0 `eval $procs | awk '{print $1}'` ||
+               if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+                       ocf_log debug "All oralsnr processes are killed."
                        return
+               fi
                sleep 2
        done
        killprocs KILL `eval $procs | awk '{print $1}'`
@@ -258,7 +277,8 @@ killprocs() {
  killprocs() {
        sig=$1
        shift 1
-       kill -$sig $*>/dev/null 2>&1
+       # Record stderr
+       kill -$sig $*>/dev/null
  }

  #
@@ -269,11 +289,23 @@ is_oralsnr_up() {
        [ x != "x`eval $procs`" ]
  }
  oralsnr_status() {
-       $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
+       output=`$lsnrctl status $listener`
+       echo "$output" | tail -1 | grep -qs 'completed successfully'
+       RET=$?
+       if [ $RET -ne 0 ]; then
+               ocf_log info "$listener status failed: $output"
+       fi
+       return $RET
  }
  # and does it work?
  tnsping() {
-       $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
+       output=`$tnsping $ORACLE_SID`
+       echo "$output" | tail -1 | grep -qs '^OK'
+       RET=$?
+       if [ $RET -ne 0 ]; then
+               ocf_log info "$tnsping $ORACLE_SID failed: $output"
+       fi
+       return $RET
  }

  #
@@ -285,10 +317,10 @@ oralsnr_monitor() {
                : good
                #ocf_log info "Listener $listener running"
                return $OCF_SUCCESS
-    else
+       else
                ocf_log info "Listener $listener not running"
                return $OCF_NOT_RUNNING
-    fi
+       fi
  }

  #
@@ -368,10 +400,10 @@ case "$1" in

    status)     if oralsnr_status
                then
-                 echo Listener $listener is running
+                 ocf_log info "Listener $listener is running"
                  exit $OCF_SUCCESS
                else
-                 echo Listener $listener is stopped
+                 ocf_log info "Listener $listener is stopped"
                  exit $OCF_NOT_RUNNING
                fi
                ;;

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/





_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/



# HG changeset patch
# User NAKAHIRA Kazutomo <nakahira.kazut...@oss.ntt.co.jp>
# Date 1266198104 -32400
# Node ID ee6e1032df4e4f24b3fb19c5f7e1c7f3d0e148a9
# Parent  5737f981ade0c07d78da4a74515f13d190f9833e
oracle: improve logging

diff -r 5737f981ade0 -r ee6e1032df4e heartbeat/oracle
--- a/heartbeat/oracle  Wed Feb 10 11:28:34 2010 +0100
+++ b/heartbeat/oracle  Mon Feb 15 10:41:44 2010 +0900
@@ -231,11 +231,23 @@
 
 testoraenv() {
        #       Let's make sure a few important things are set...
-       [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+       if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+               ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. 
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
                return 1
+       fi
        #       and some important things are there
-       [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+       if [ ! -x "$sqlplus" ]; then
+               ocf_log err "Executable sqlplus command($sqlplus) dose not 
exist."
                return 1
+       fi
+       if [ ! -x "$lsnrctl" ]; then
+               ocf_log err "Executable lsnrctl command($lsnrctl) dose not 
exist."
+               return 1
+       fi
+       if [ ! -x "$tnsping" ]; then
+               ocf_log err "Executable tnsping command($tnsping) dose not 
exist."
+               return 1
+       fi
        return 0
 }
 
@@ -367,13 +379,20 @@
 # Part 1: Oracle
 dumpinstipc() {
        local dumpdest=`dbasql getdumpdest`
-       [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
+       if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
+               ocf_log warn "dumpdest($dumpdest) is not a regular directory."
+               return 1
+       fi
        local -i fcount=`ls -rt $dumpdest | wc -l`
-       dbasql getipc >/dev/null 2>&1
+       output=`dbasql getipc`
        local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
        local -i fcount2=`ls -rt $dumpdest | wc -l`
-       [ $((fcount+1)) -eq $fcount2 ] || return 1  # more than one file created
-       echo $dumpdest/$lastf
+       if [ $((fcount+1)) -eq $fcount2 ]; then
+               echo $dumpdest/$lastf
+       else
+               ocf_log warn "dumpinstipc failed because the number of output 
files is wrong. File count before the dump =$fcount, file count after the 
dump=$fcount2, getipc result=$output"
+               return 1
+       fi
 }
 parseipc() {
        local inf=$1
@@ -440,7 +459,13 @@
 }
 # instance in OPEN state?
 instance_live() {
-       [ "`dbasql dbstat`" = OPEN ]
+       output=`dbasql dbstat`
+       if [ "$output" = OPEN ]; then
+               return 0
+       else
+               ocf_log info "Instance state is not OPEN. dbstat result=$output"
+               return 1
+       fi
 }
 
 ora_cleanup() {
@@ -498,6 +523,7 @@
                # try to cleanup in case of
                # ORA-01081: cannot start already-running ORACLE - shut it down 
first
                if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
+                       ocf_log info "ORA-01081 error was found. Try to cleanup 
oracle. DB start output=$output"
                        ora_cleanup
                        output=`dbasql dbstart_mount`
                fi
@@ -510,7 +536,7 @@
                ;;
        *)
                : error!!
-               ocf_log error "Oracle $ORACLE_SID can not mount."
+               ocf_log error "Oracle $ORACLE_SID can not mount. DB 
status=$status, DB start output=$output"
                return $OCF_ERR_GENERIC
                ;;
        esac
@@ -523,13 +549,16 @@
        fi
        output=`dbasql dbopen`
 
-       if is_oracle_up && instance_live; then
+       if ! is_oracle_up; then
+               ocf_log err "Oracle process is not started: $output"
+               return $OCF_ERR_GENERIC
+       elif ! instance_live; then
+               ocf_log err "Oracle instance $ORACLE_SID not started: $output"
+               return $OCF_ERR_GENERIC
+       else
                : cool, we are up and running
                ocf_log info "Oracle instance $ORACLE_SID started: $output"
                return $OCF_SUCCESS
-       else
-               ocf_log err "Oracle instance $ORACLE_SID not started: $output"
-               return $OCF_ERR_GENERIC
        fi
 }
 
@@ -562,13 +591,21 @@
 killprocs() {
        local sig=$1
        shift 1
-       kill -$sig $* >/dev/null 2>&1
+       # Record stderr
+       kill -$sig $* >/dev/null
 }
 ora_kill() {
-       killprocs TERM `eval $procs | awk '{print $1}'`
+       oraprocs=`eval $procs | awk '{print $1}'`
+       if [ -z "$oraprocs" ]; then
+               ocf_log debug "All oracle processes are already stopped."
+               return
+       fi
+       killprocs TERM $oraprocs
        for i in 1 2 3 4 5; do
-               killprocs 0 `eval $procs | awk '{print $1}'` ||
+               if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+                       ocf_log debug "All oracle processes are killed."
                        return
+               fi
                sleep 5
        done
        killprocs KILL `eval $procs | awk '{print $1}'`
@@ -578,14 +615,16 @@
 # oracle_monitor: Can the Oracle instance do anything useful?
 #
 oracle_monitor() {
-       if is_oracle_up && instance_live
-       then
-               #ocf_log info "Oracle instance $ORACLE_SID is alive"
-               return $OCF_SUCCESS
-       else
+       if ! is_oracle_up; then
+               ocf_log info "Oracle process is down"
+               return $OCF_NOT_RUNNING
+       fi
+       if ! instance_live; then
                ocf_log info "Oracle instance $ORACLE_SID is down"
                return $OCF_NOT_RUNNING
        fi
+       #ocf_log info "Oracle instance $ORACLE_SID is alive"
+       return $OCF_SUCCESS
 }
 
 #
# HG changeset patch
# User NAKAHIRA Kazutomo <nakahira.kazut...@oss.ntt.co.jp>
# Date 1266198714 -32400
# Node ID e619d11ea3a4514bfeb6578b3178b750c3644699
# Parent  ee6e1032df4e4f24b3fb19c5f7e1c7f3d0e148a9
oralsnr: improve logging

diff -r ee6e1032df4e -r e619d11ea3a4 heartbeat/oralsnr
--- a/heartbeat/oralsnr Mon Feb 15 10:41:44 2010 +0900
+++ b/heartbeat/oralsnr Mon Feb 15 10:51:54 2010 +0900
@@ -158,11 +158,23 @@
 
 testoraenv() {
        #       Let's make sure a few important things are set...
-       [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+       if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+               ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. 
ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
                return 1
+       fi
        #       and some important things are there
-       [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+       if [ ! -x "$sqlplus" ]; then
+               ocf_log err "Executable sqlplus command($sqlplus) dose not 
exist."
                return 1
+       fi
+       if [ ! -x "$lsnrctl" ]; then
+               ocf_log err "Executable lsnrctl command($lsnrctl) dose not 
exist."
+               return 1
+       fi
+       if [ ! -x "$tnsping" ]; then
+               ocf_log err "Executable tnsping command($tnsping) dose not 
exist."
+               return 1
+       fi
        return 0
 }
 
@@ -247,10 +259,17 @@
 # kill the listener procs
 # give them 10 secs to exit cleanly (5 times 2)
 oralsnr_kill() {
-       killprocs TERM `eval $procs | awk '{print $1}'`
+       oraprocs=`eval $procs | awk '{print $1}'`
+       if [ -z "$oraprocs" ]; then
+               ocf_log debug "All oralsnr processes are already stopped."
+               return
+       fi
+       killprocs TERM $oraprocs
        for i in 1 2 3 4 5; do
-               killprocs 0 `eval $procs | awk '{print $1}'` ||
+               if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+                       ocf_log debug "All oralsnr processes are killed."
                        return
+               fi
                sleep 2
        done
        killprocs KILL `eval $procs | awk '{print $1}'`
@@ -258,7 +277,8 @@
 killprocs() {
        sig=$1
        shift 1
-       kill -$sig $* >/dev/null 2>&1
+       # Record stderr
+       kill -$sig $* >/dev/null
 }
 
 #
@@ -269,11 +289,23 @@
        [ x != "x`eval $procs`" ]
 }
 oralsnr_status() {
-       $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
+       output=`$lsnrctl status $listener`
+       echo "$output" | tail -1 | grep -qs 'completed successfully'
+       RET=$?
+       if [ $RET -ne 0 ]; then
+               ocf_log info "$listener status failed: $output"
+       fi
+       return $RET
 }
 # and does it work?
 tnsping() {
-       $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
+       output=`$tnsping $ORACLE_SID`
+       echo "$output" | tail -1 | grep -qs '^OK'
+       RET=$?
+       if [ $RET -ne 0 ]; then
+               ocf_log info "$tnsping $ORACLE_SID failed: $output"
+       fi
+       return $RET
 }
 
 #
@@ -285,10 +317,10 @@
                : good
                #ocf_log info "Listener $listener running"
                return $OCF_SUCCESS
-    else
+       else
                ocf_log info "Listener $listener not running"
                return $OCF_NOT_RUNNING
-    fi
+       fi
 }
 
 #
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to