OK. I notice you're running drbd 8.3. The scripts distributed with pacemaker 1.0.1 only worked with 8.2 (I'm not sure about 1.0.2). I made a few tiny changes to the drbd script so it would run with 8.3. I've attached the changed script. I also added more logging so you can see exactly what is going on in the drbd OCF script at debug level. You probably want to change your loggging to debug to get all
the output while trying to figure this out.

However, the errors I got from the script were very different from yours: I was getting errors
from drbdadm, not failures to load the driver.

The other thing I did was run the OCF scripts "by hand" (you have to set a bunch of env variables). I can't find the script I used to test drbd, but I've attached one I used for mysql; you should be able to adapt it to
your use.  As always, remember "bash -x" is your friend.

   Neil

Jerome Yanga wrote:
Hi Neil!

Yes.  DRBD works outside of Pacemaker.  When I do a "service drbd start" on each node, 
drbd runs properly and are both "Secondary".

jerome
#!/bin/sh
#
#
#       OCF Resource Agent compliant drbd resource script.
#
# Copyright (c) 2004 - 2007 SUSE LINUX Products GmbH, Lars Marowsky-Bree
#                    All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#

# OCF instance parameters
#       OCF_RESKEY_drbd_resource
#       OCF_RESKEY_drbdconf
#       OCF_RESKEY_CRM_meta_clone_max
#       OCF_RESKEY_CRM_meta_clone_node_max
#       OCF_RESKEY_master_max
#       OCF_RESKEY_master_node_max


#######################################################################
# Initialization:

if [ -n "$OCF_DEBUG_LIBRARY" ]; then
        . $OCF_DEBUG_LIBRARY
else
        . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
fi

#######################################################################

meta_data() {
        cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="drbd">
<version>1.1</version>

<longdesc lang="en">
Master/Slave OCF Resource Agent for DRBD
</longdesc>

<shortdesc lang="en">This resource agent manages a Distributed
Replicated Block Device (DRBD) object as a master/slave
resource. DRBD is a mechanism for replicating storage; please see the
documentation for setup details.</shortdesc>

<parameters>
<parameter name="drbd_resource" unique="1" required="1">
<longdesc lang="en">
The name of the drbd resource from the drbd.conf file.
</longdesc>
<shortdesc lang="en">drbd resource name</shortdesc>
<content type="string" default="drbd0" />
</parameter>

<parameter name="drbdconf">
<longdesc lang="en">
Full path to the drbd.conf file.
</longdesc>
<shortdesc lang="en">Path to drbd.conf</shortdesc>
<content type="string" default="/etc/drbd.conf"/>
</parameter>

<parameter name="clone_overrides_hostname">
<longdesc lang="en">
Whether or not to override the hostname with the clone number. This can
be used to create floating peer configurations; drbd will be told to
use node_&#60;cloneno&#62; as the hostname instead of the real uname,
which can then be used in drbd.conf.
</longdesc>
<shortdesc lang="en">Override drbd hostname</shortdesc>
<content type="boolean" default="no"/>
</parameter>


<parameter name="clone_max" required="1">
<longdesc lang="en">
Number of clones of this drbd resource. Do not fiddle with the default.
</longdesc>
<shortdesc lang="en">Number of clones</shortdesc>
<content type="integer" default="2"/>
</parameter>

<parameter name="clone_node_max" required="1">
<longdesc lang="en">
Clones per node. Do not fiddle with the default.
</longdesc>
<shortdesc lang="en">Number of nodes</shortdesc>
<content type="integer" default="1"/>
</parameter>

<parameter name="master_max" required="1">
<longdesc lang="en">
Maximum number of active primaries. Do not fiddle with the default.
</longdesc>
<shortdesc lang="en">Number of primaries</shortdesc>
<content type="integer" default="1"/>
</parameter>

<parameter name="master_node_max" required="1">
<longdesc lang="en">
Maximum number of primaries per node. Do not fiddle with the default.
</longdesc>
<shortdesc lang="en">Number of primaries per node</shortdesc>
<content type="integer" default="1"/>
</parameter>
</parameters>

<actions>
<action name="start"   timeout="240" />
<action name="promote"   timeout="90" />
<action name="demote"   timeout="90" />
<action name="notify"   timeout="90" />
<action name="stop"    timeout="100" />
<action name="monitor" depth="0"  timeout="20" interval="20" start-delay="1m" 
role="Slave" />
<action name="monitor" depth="0"  timeout="20" interval="10" start-delay="1m" 
role="Master" />
<action name="meta-data"  timeout="5" />
<action name="validate-all"  timeout="30" />
</actions>
</resource-agent>
END

        exit $OCF_SUCCESS
}

do_cmd() {
        local cmd="$*"
        ocf_log debug "$RESOURCE: Calling $cmd"
        local cmd_out=$($cmd 2>&1)
        ret=$?
        
        if [ $ret -ne 0 ]; then
                ocf_log err "$RESOURCE: Called $cmd"
                ocf_log err "$RESOURCE: Exit code $ret"
                ocf_log err "$RESOURCE: Command output: $cmd_out"
        else
                ocf_log debug "$RESOURCE: Exit code $ret"
                ocf_log debug "$RESOURCE: Command output: $cmd_out"
        fi
        
        echo $cmd_out
        
        return $ret
}

do_drbdadm() {
        local cmd="$DRBDADM -c $DRBDCONF $*"
        ocf_log debug "$RESOURCE: Calling $cmd"
        local cmd_out=$($cmd 2>&1)
        ret=$?
        # Trim the garbage drbdadm likes to print when using the node
        # override feature:
        local cmd_ret=$(echo $cmd_out | sed -e 's/found __DRBD_NODE__.*<<//;')
        
        if [ $ret -ne 0 ]; then
                ocf_log err "$RESOURCE: Called $cmd"
                ocf_log err "$RESOURCE: Exit code $ret"
                ocf_log err "$RESOURCE: Command output: $cmd_ret"
        else
                ocf_log debug "$RESOURCE: Exit code $ret"
                ocf_log debug "$RESOURCE: Command output: $cmd_ret"
        fi
        
        echo $cmd_ret
        
        return $ret
}

drbd_init() {
        CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "

        ocf_log debug "$RESOURCE: drbd_init, ACTION $ACTION"
        
        RESOURCE="$OCF_RESKEY_drbd_resource"
        CLONE_NO="$OCF_RESKEY_CRM_meta_clone"
        DRBDCONF="${OCF_RESKEY_drbdconf:=/etc/drbd.conf}"

        if [ ! -f "$DRBDCONF" ]; then
                ocf_log err "drbd.conf not installed."
                if [ '$ACTION' = 'monitor' ]; then
                        exit $OCF_NOT_RUNNING
                else
                        # exit $OCF_ERR_INSTALLED
                        exit $OCF_NOT_RUNNING
                fi
        fi

        # check_binary $DRBDADM

        case "$OCF_RESKEY_clone_overrides_hostname" in
        [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
                __DRBD_NODE__="node_${CLONE_NO}"
                export __DRBD_NODE__
                ocf_log info "$RESOURCE: Using hostname $__DRBD_NODE__"
                ;;
        esac

}


#######################################################################

drbd_usage() {
        cat <<END
usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}

is_drbd_enabled () {
        if [ -f /proc/drbd ]; then
                return 0
        fi
        return 1
}


drbd_get_status() {
        DRBD_STATE=$(do_drbdadm role $RESOURCE)
        DRBD_STATE_LOCAL=$(echo $DRBD_STATE | sed -e 's#/.*##')
        DRBD_STATE_REMOTE=$(echo $DRBD_STATE | sed -e 's#.*/##')
        DRBD_CSTATE=$(do_drbdadm cstate $RESOURCE)

        # Sanitize the various states, drbdadm is quite annoying; so if it 
        # outputs something which doesn't make sense, translate it into
        # a harmless state:

        case "$DRBD_STATE_LOCAL" in
                "Not configured"|"Primary"|"Secondary") ;;
                *)      DRBD_STATE_LOCAL="Not configured" ;;
        esac

        case "$DRBD_STATE_REMOTE" in
                "Primary"|"Secondary"|"Unknown") ;;
                *)      DRBD_STATE_REMOTE="Not configured" ;;
        esac

        case "$DRBD_CSTATE" in
                Unconfigured|StandAlone|Unconnected|Timeout|BrokenPipe) ;;
                
NetworkFailure|WFConnection|WFReportParams|Connected|SkippedSyncS) ;;
                SkippedSyncT|WFBitMapS|WFBitMapT|SyncSource|SyncTarget) ;; 
                PausedSyncS|PausedSyncT) ;;
                *) DRBD_CSTATE="Unconfigured" ;;
        esac

        ocf_log debug "$RESOURCE status: $DRBD_STATE $DRBD_STATE_LOCAL 
$DRBD_STATE_REMOTE $DRBD_CSTATE"
}

drbd_start() {
        if is_drbd_enabled; then
            : OK
        else
            do_cmd modprobe -s drbd `$DRBDADM sh-mod-parms` || { 
                    ocf_log err "Cannot load the drbd module."$'\n'; 
                    return $OCF_ERR_GENERIC 
            }
            ocf_log debug "$RESOURCE start: Module loaded."
        fi

        drbd_get_status

        if [ "$DRBD_STATE_LOCAL" != "Not configured" ]; then
                ocf_log debug "$RESOURCE start: already configured."
                return $OCF_SUCCESS
        fi

        if do_drbdadm up $RESOURCE ; then
                drbd_get_status
                if [ "$DRBD_STATE_LOCAL" != "Secondary" ]; then
                        ocf_log err "$RESOURCE start: not in Secondary mode 
after start."
                        return $OCF_ERR_GENERIC
                fi

                ocf_log debug "$RESOURCE start: succeeded."
                return $OCF_SUCCESS
        else
                ocf_log err "$RESOURCE: Failed to start up."
                return $OCF_ERR_GENERIC
        fi
}

drbd_update_prefs() {
        drbd_get_status

        # TODO: This is probably way too complex. 
        case $DRBD_CSTATE in
        Connected) 
                do_cmd $CRM_MASTER -v 75
                ;;
        SyncSource|PausedSyncS|WFBitMapS|SkippedSyncS)
                do_cmd $CRM_MASTER -v 100
                ;;
        # TODO:
        # (Inconsistent || Diskless && WFConnection) should be -infinity
        # This one implies we'll try to promote even on disconnected
        # nodes, but that might not work.
        WFConnection) 
                do_cmd $CRM_MASTER -v 10
                ;;
        *)
                do_cmd $CRM_MASTER -v 5
                ;;
        esac

        return $OCF_SUCCESS
}

drbd_stop() {
        # Do not bother if drbd is not enabled
        if is_drbd_enabled; then
                drbd_get_status
                
                # Clear preference for becoming master
                do_cmd $CRM_MASTER -D
        
                if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
                        ocf_log debug "$RESOURCE stop: already unconfigured."
                        return $OCF_SUCCESS
                fi
                
                # TODO: this is a _force_ operation. we may need to kill higher
                # levels to be able to down drbd. figure out how...
                if do_drbdadm down $RESOURCE ; then
                        ocf_log debug "$RESOURCE stop: drbdadm down succeeded."
                        
                        # TODO: If drbdadm propagated error codes, this
                        # wouldn't be needed.
                        drbd_get_status

                        if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
                                return $OCF_SUCCESS
                        else
                                ocf_log err "$RESOURCE stop: Not stopped."
                        fi
                else
                        ocf_log err "$RESOURCE stop: Failed with exit code: $?"
                fi
                return $OCF_ERR_GENERIC
        else
                ocf_log debug "$RESOURCE stop: drbd not loaded."
        fi

        return $OCF_SUCCESS
}

drbd_monitor() {
        # TODO: Think about how to monitor drbd and what constitutes
        # failure cases...
        # diskless etc?
        # A secondary node which is supposed to be primary?

        # TODO: we ought to update the preferences here occasionally,
        # but that causes transitions right now ...

        if is_drbd_enabled; then
            : OK
        else
            ocf_log warn "$RESOURCE monitor: drbd module not loaded"
            return $OCF_NOT_RUNNING
        fi

        drbd_get_status

        if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
            ocf_log debug "$RESOURCE monitor: resource not configured"
            return $OCF_NOT_RUNNING
        elif [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
#           drbd_update_prefs
            return $OCF_RUNNING_MASTER
        elif [ "$DRBD_STATE_LOCAL" = "Secondary" ]; then
#           drbd_update_prefs
            return $OCF_SUCCESS
        else
            ocf_log err "$RESOURCE monitor: unexpected local state: 
$DRBD_STATE_LOCAL"
        fi
        
        return $OCF_ERR_GENERIC
}

drbd_promote() {
        if is_drbd_enabled; then
            : OK
        else
            ocf_log err "drbd is not enabled"
            return $OCF_ERR_GENERIC
        fi

        drbd_get_status

        if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
                ocf_log info "$RESOURCE promote: already primary"
                return $OCF_SUCCESS
        fi
        
        if [ "$DRBD_STATE_LOCAL" != "Secondary" ]; then
                ocf_log warn "$RESOURCE promote: Not secondary to start with."
                return $OCF_ERR_GENERIC
        fi
        if do_drbdadm primary $RESOURCE ; then
                # TODO: WORK AROUND because drbdadm has a bug and
                # reports success even if it failed :-(
                drbd_get_status

                if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
                        ocf_log info "$RESOURCE promote: primary succeeded"
                        return $OCF_SUCCESS
                else 
                        ocf_log err "$RESOURCE promote: Not primary despite 
drbdadm call."
                fi
                
        else
                ocf_log err "$RESOURCE promote: Failed with exit code $?."
        fi
        return $OCF_ERR_GENERIC
}

drbd_demote() {
        if is_drbd_enabled; then
            : OK
        else
            # A stopped resource also is demoted.
            ocf_log debug "$RESOURCE demote: module not loaded"
            return $OCF_SUCCESS
        fi
        
        if [ "$DRBD_STATE_LOCAL" = "Secondary" ]; then
                ocf_log debug "$RESOURCE demote: already secondary"
                return $OCF_SUCCESS
        fi
        if [ "$DRBD_STATE_LOCAL" = "Not configured" ]; then
                ocf_log debug "$RESOURCE demote: already stopped"
                return $OCF_NOT_RUNNING
        fi
        
        # TODO: this is a _force_ operation. we may need to kill higher
        # levels (or switch them to r/o) to be able to demote drbd.
        # figure out how...
        if do_drbdadm secondary $RESOURCE ; then
                if [ "$DRBD_STATE_LOCAL" = "Primary" ]; then
                        ocf_log err "$RESOURCE demote: still primary!"
                        return $OCF_ERR_GENERIC
                fi
        
                ocf_log debug "$RESOURCE demote: succeeded"
                return $OCF_SUCCESS
        else
                ocf_log err "$RESOURCE demote: Failed with exit code $?."
                return $OCF_ERR_GENERIC
        fi
}

drbd_notify() {
        local n_type="$OCF_RESKEY_CRM_meta_notify_type"
        local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
        set -- $OCF_RESKEY_CRM_meta_notify_active_resource
        local n_active="$#"
        set -- $OCF_RESKEY_CRM_meta_notify_stop_resource
        local n_stop="$#"
        set -- $OCF_RESKEY_CRM_meta_notify_start_resource
        local n_start="$#"

        ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active 
$n_active - starting $n_start - stopping $n_stop"
        
        case $n_type in
        pre)
                case $n_op in
                promote) # TODO:
                         # Resist promotion of the other side in case we
                         # are already primary - though the CRM should
                         # not even attempt that.
                        ;;
                esac
                ;;
        post)   # TODO: Entire case statement which follows redundant?
                case $n_op in
                start)
                        if [ "$n_active" -eq 2 ]; then
                                # The other side is running, so we ought
                                # to connect and wait for that.
                                do_drbdadm connect $RESOURCE
                                do_drbdadm wait_connect $RESOURCE
                                # TODO: If this can cause a hang if the
                                # other side isn't connected or goes
                                # away during that, maybe just sleep
                                # here for 5-10s or take out the entire
                                # case statement
                        fi
                        ;;
                stop)
                        # TODO BUG: disconnect seems to force
                        # non-primary mode?!?
                        #### do_drbdadm disconnect $RESOURCE

                        # TODO: If we are secondary, do we need to do
                        # anything about a stopped primary in case we
                        # had an outdated flag...?
                        ;;
                esac

                # After something has been done is a good time to
                # recheck our status:
                drbd_update_prefs
                ;;
        esac
        
        return $OCF_SUCCESS
}

drbd_validate_all () {
        # First check the configuration file
        if [ -n "$DRBDCONF" ] && [ ! -f "$DRBDCONF" ]; then
            ocf_log err "Configuration file does not exist: $DRBDCONF"
            return $OCF_ERR_CONFIGURED
        fi

        # Check the resource name, it should appear in DRBDCONF
        if [ -z "$RESOURCE" ]; then
            ocf_log err "No resource name specified!"
            return $OCF_ERR_ARGS
        fi
        
        if do_drbdadm dump $RESOURCE 2>/dev/null ; then
            :
        else
            ocf_log err "Invalid configuration file $DRBDCONF"
            return $OCF_ERR_CONFIGURED
        fi
        
        if [ "$OCF_RESKEY_CRM_meta_clone_max" -ne 2 ] \
         || [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] \
         || [ "$OCF_RESKEY_master_node_max" -ne 1 ] \
         || [ "$OCF_RESKEY_master_max" -ne 1 ] ; then
                ocf_log err "Clone options misconfigured."
                exit $OCF_ERR_CONFIGURED
        fi
        
        return $OCF_SUCCESS
}


if [ $# -ne 1 ]; then
        echo "Incorrect parameter count."
        drbd_usage
        exit $OCF_ERR_ARGS
fi

ACTION=$1
case $ACTION in
meta-data)      meta_data
                ;;
validate-all)   drbd_init
                drbd_validate_all
                ;;
start|stop|monitor|promote|demote|notify)
                if ocf_is_root ; then : ; else
                        ocf_log err "You must be root to perform this 
operation."
                        exit $OCF_ERR_PERM
                fi
        
                drbd_init
                drbd_$ACTION
                exit $?
                ;;
usage|help)     drbd_usage
                exit $OCF_SUCCESS
                ;;
*)              drbd_usage
                exit $OCF_ERR_ARGS
                ;;
esac

#! /bin/bash

# test mysql heartbeat ocf script

export OCF_ROOT=/usr/lib/ocf
export OCF_RA_VERSION_MAJOR=1
export OCF_RESOURCE_INSTANCE=mysql0
export OCF_RESKEY_datadir=/dbdata
export OCF_RESKEY_config=/dbdata/my.cnf
export OCF_RESKEY_binary=/usr/bin/mysqld_safe
export OCF_RESKEY_test_user=test
export OCF_RESKEY_test_table=information_schema.schemata
export OCF_RESKEY_OCF_CHECK_LEVEL=10
export HA_LOGFACILITY=daemon

# export OCF_RESKEY_additional_parameters=--skip-grant-tables

$OCF_ROOT/resource.d/heartbeat/mysql "$@"
_______________________________________________
Linux-HA mailing list
Linux-HA@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to