Hi Kazutomo-san,

On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote:
> Hi, all
> 
> I improved logging of the oracle/oralsnr RA.
> This patch has aimed to record the execution result of the sqlplus
> (and other commands) and output a detailed failure log using ocf_log
> when some problems occurred in RA operation.

Thanks for the patch. There are a few spelling problems, but I'll
fix those. Also, the status operation must output to stdout,
that's not meant for the logs.

Cheers,

Dejan

> Best Regards,
> NAKAHIRA Kazutomo
> 
> -- 
> ----------------------------------------
> NAKAHIRA Kazutomo
> NTT DATA INTELLILINK CORPORATION
> Open Source Business Unit
> Software Services Integration Business Division

> # HG changeset patch
> # User r...@prec370b
> # Date 1264145021 -32400
> # Node ID 0ecef9560522601936888ce168dae5f563662402
> # Parent  3024963150433960c51aa1bdccde39839efb09b7
> oracle: improve logging
> 
> diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle
> --- a/heartbeat/oracle        Thu Jan 21 16:42:40 2010 +0100
> +++ b/heartbeat/oracle        Fri Jan 22 16:23:41 2010 +0900
> @@ -231,11 +231,23 @@ ora_info() {
>  
>  testoraenv() {
>       #       Let's make sure a few important things are set...
> -     [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
> +     if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
> +             ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. 
> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
>               return 1
> +     fi
>       #       and some important things are there
> -     [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
> +     if [ ! -x "$sqlplus" ]; then
> +             ocf_log err "Executeble sqlplus command($sqlplus) dose not 
> exist."
>               return 1
> +     fi
> +     if [ ! -x "$lsnrctl" ]; then
> +             ocf_log err "Executeble lsnrctl command($lsnrctl) dose not 
> exist."
> +             return 1
> +     fi
> +     if [ ! -x "$tnsping" ]; then
> +             ocf_log err "Executeble tnsping command($tnsping) dose not 
> exist."
> +             return 1
> +     fi
>       return 0
>  }
>  
> @@ -367,13 +379,20 @@ showdbstat() {
>  # Part 1: Oracle
>  dumpinstipc() {
>       local dumpdest=`dbasql getdumpdest`
> -     [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
> +     if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
> +             ocf_log warn "dumpdest($dumpdest) is not a regular directory."
> +             return 1
> +     fi
>       local -i fcount=`ls -rt $dumpdest | wc -l`
> -     dbasql getipc >/dev/null 2>&1
> +     output=`dbasql getipc`
>       local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
>       local -i fcount2=`ls -rt $dumpdest | wc -l`
> -     [ $((fcount+1)) -eq $fcount2 ] || return 1  # more than one file created
> -     echo $dumpdest/$lastf
> +     if [ $((fcount+1)) -eq $fcount2 ]; then
> +             echo $dumpdest/$lastf
> +     else
> +             ocf_log warn "dumpinstipc failed bacause the number of output 
> files is wrong. before dump file count=$fcount, after dump file 
> count=$fcount2, getipc result=$output"
> +             return 1
> +     fi
>  }
>  parseipc() {
>       local inf=$1
> @@ -440,7 +459,13 @@ is_oracle_up() {
>  }
>  # instance in OPEN state?
>  instance_live() {
> -     [ "`dbasql dbstat`" = OPEN ]
> +     output=`dbasql dbstat`
> +     if [ "$output" = OPEN ]; then
> +             return 0
> +     else
> +             ocf_log info "Instance state is not OPEN. dbstat result=$output"
> +             return 1
> +     fi
>  }
>  
>  ora_cleanup() {
> @@ -498,6 +523,7 @@ oracle_start() {
>               # try to cleanup in case of
>               # ORA-01081: cannot start already-running ORACLE - shut it down 
> first
>               if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
> +                     ocf_log info "ORA-01081 error was found. try to cleanup 
> oracle. DB start output=$output"
>                       ora_cleanup
>                       output=`dbasql dbstart_mount`
>               fi
> @@ -510,7 +536,7 @@ oracle_start() {
>               ;;
>       *)
>               : error!!
> -             ocf_log error "Oracle $ORACLE_SID can not mount."
> +             ocf_log error "Oracle $ORACLE_SID can not mount. DB 
> status=$status, DB start output=$output"
>               return $OCF_ERR_GENERIC
>               ;;
>       esac
> @@ -523,13 +549,16 @@ oracle_start() {
>       fi
>       output=`dbasql dbopen`
>  
> -     if is_oracle_up && instance_live; then
> +     if ! is_oracle_up; then
> +             ocf_log err "Oracle process is not started: $output"
> +             return $OCF_ERR_GENERIC
> +     elif ! instance_live; then
> +             ocf_log err "Oracle instance $ORACLE_SID not started: $output"
> +             return $OCF_ERR_GENERIC
> +     else
>               : cool, we are up and running
>               ocf_log info "Oracle instance $ORACLE_SID started: $output"
>               return $OCF_SUCCESS
> -     else
> -             ocf_log err "Oracle instance $ORACLE_SID not started: $output"
> -             return $OCF_ERR_GENERIC
>       fi
>  }
>  
> @@ -562,13 +591,21 @@ killprocs() {
>  killprocs() {
>       local sig=$1
>       shift 1
> -     kill -$sig $* >/dev/null 2>&1
> +     # Record stderr
> +     kill -$sig $* >/dev/null
>  }
>  ora_kill() {
> -     killprocs TERM `eval $procs | awk '{print $1}'`
> +     oraprocs=`eval $procs | awk '{print $1}'`
> +     if [ -z "$oraprocs" ]; then
> +             ocf_log debug "All oracle processes are already stopped."
> +             return
> +     fi
> +     killprocs TERM $oraprocs
>       for i in 1 2 3 4 5; do
> -             killprocs 0 `eval $procs | awk '{print $1}'` ||
> +             if [ -z "`eval $procs | awk '{print $1}'`" ]; then
> +                     ocf_log debug "All oracle processes are killed."
>                       return
> +             fi
>               sleep 5
>       done
>       killprocs KILL `eval $procs | awk '{print $1}'`
> @@ -578,14 +615,16 @@ ora_kill() {
>  # oracle_monitor: Can the Oracle instance do anything useful?
>  #
>  oracle_monitor() {
> -     if is_oracle_up && instance_live
> -     then
> -             #ocf_log info "Oracle instance $ORACLE_SID is alive"
> -             return $OCF_SUCCESS
> -     else
> +     if ! is_oracle_up; then
> +             ocf_log info "Oracle process is down"
> +             return $OCF_NOT_RUNNING
> +     fi
> +     if ! instance_live; then
>               ocf_log info "Oracle instance $ORACLE_SID is down"
>               return $OCF_NOT_RUNNING
>       fi
> +     #ocf_log info "Oracle instance $ORACLE_SID is alive"
> +     return $OCF_SUCCESS
>  }
>  
>  #
> @@ -675,10 +714,10 @@ case "$1" in
>  
>    status)    if is_oracle_up
>               then
> -               echo Oracle instance $ORACLE_SID is running
> +               ocf_log info "Oracle instance $ORACLE_SID is running"
>                 exit $OCF_SUCCESS
>               else
> -               echo Oracle instance $ORACLE_SID is stopped
> +               ocf_log info "Oracle instance $ORACLE_SID is stopped"
>                 exit $OCF_NOT_RUNNING
>               fi
>               ;;

> # HG changeset patch
> # User r...@prec370b
> # Date 1264145083 -32400
> # Node ID b36abb1554500f4bdf33858989205d02606c609d
> # Parent  0ecef9560522601936888ce168dae5f563662402
> oralsnr: improve logging
> 
> diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr
> --- a/heartbeat/oralsnr       Fri Jan 22 16:23:41 2010 +0900
> +++ b/heartbeat/oralsnr       Fri Jan 22 16:24:43 2010 +0900
> @@ -158,11 +158,23 @@ ora_info() {
>  
>  testoraenv() {
>       #       Let's make sure a few important things are set...
> -     [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
> +     if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
> +             ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. 
> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
>               return 1
> +     fi
>       #       and some important things are there
> -     [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
> +     if [ ! -x "$sqlplus" ]; then
> +             ocf_log err "Executeble sqlplus command($sqlplus) dose not 
> exist."
>               return 1
> +     fi
> +     if [ ! -x "$lsnrctl" ]; then
> +             ocf_log err "Executeble lsnrctl command($lsnrctl) dose not 
> exist."
> +             return 1
> +     fi
> +     if [ ! -x "$tnsping" ]; then
> +             ocf_log err "Executeble tnsping command($tnsping) dose not 
> exist."
> +             return 1
> +     fi
>       return 0
>  }
>  
> @@ -247,10 +259,17 @@ oralsnr_stop() {
>  # kill the listener procs
>  # give them 10 secs to exit cleanly (5 times 2)
>  oralsnr_kill() {
> -     killprocs TERM `eval $procs | awk '{print $1}'`
> +     oraprocs=`eval $procs | awk '{print $1}'`
> +     if [ -z "$oraprocs" ]; then
> +             ocf_log debug "All oralsnr processes are already stopped."
> +             return
> +     fi
> +     killprocs TERM $oraprocs
>       for i in 1 2 3 4 5; do
> -             killprocs 0 `eval $procs | awk '{print $1}'` ||
> +             if [ -z "`eval $procs | awk '{print $1}'`" ]; then
> +                     ocf_log debug "All oralsnr processes are killed."
>                       return
> +             fi
>               sleep 2
>       done
>       killprocs KILL `eval $procs | awk '{print $1}'`
> @@ -258,7 +277,8 @@ killprocs() {
>  killprocs() {
>       sig=$1
>       shift 1
> -     kill -$sig $* >/dev/null 2>&1
> +     # Record stderr
> +     kill -$sig $* >/dev/null
>  }
>  
>  #
> @@ -269,11 +289,23 @@ is_oralsnr_up() {
>       [ x != "x`eval $procs`" ]
>  }
>  oralsnr_status() {
> -     $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
> +     output=`$lsnrctl status $listener`
> +     echo "$output" | tail -1 | grep -qs 'completed successfully'
> +     RET=$?
> +     if [ $RET -ne 0 ]; then
> +             ocf_log info "$listener status failed: $output"
> +     fi
> +     return $RET
>  }
>  # and does it work?
>  tnsping() {
> -     $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
> +     output=`$tnsping $ORACLE_SID`
> +     echo "$output" | tail -1 | grep -qs '^OK'
> +     RET=$?
> +     if [ $RET -ne 0 ]; then
> +             ocf_log info "$tnsping $ORACLE_SID failed: $output"
> +     fi
> +     return $RET
>  }
>  
>  #
> @@ -285,10 +317,10 @@ oralsnr_monitor() {
>               : good
>               #ocf_log info "Listener $listener running"
>               return $OCF_SUCCESS
> -    else
> +     else
>               ocf_log info "Listener $listener not running"
>               return $OCF_NOT_RUNNING
> -    fi
> +     fi
>  }
>  
>  #
> @@ -368,10 +400,10 @@ case "$1" in
>  
>    status)    if oralsnr_status
>               then
> -               echo Listener $listener is running
> +               ocf_log info "Listener $listener is running"
>                 exit $OCF_SUCCESS
>               else
> -               echo Listener $listener is stopped
> +               ocf_log info "Listener $listener is stopped"
>                 exit $OCF_NOT_RUNNING
>               fi
>               ;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to