Hello,

I'm implementing some HA solution using pacemaker and I've made some
changes to mysql RA. Maybe you get interested in some of them. Patch
attached. List of changes:
 * [bugfix] monitor return $OCF_RUNNING_MASTER on master
 * [bugfix] slave info collected with replication user
 * [bugfix] cut ending space from OCF_* host lists
 * [doc] suggest --skip-slave-start option
 * [feature] detailed logging on errors
 * [feature] setup replication on late slave start
 * [feature] another concept of M/S replication - try to keep state

Some explanation about the last one: in dual-node mysql setup there is
no need to reset master after any topology change. You need to store
last log_file and log_pos and when master demotes (or started as slave)
it can continue from the last position (as slave - new master - was in
read-only mode). This also helps to not lose some data, ex. in scenario:
 - slave maintenance shutdown 
 - some time later master reboot (yes - no mysql left for a moment)
   -> this resets master
 - slave startup
   -> replication starts from point at master reboot, loosing data from
      time after slave shutdown and before master reboot

The main concept is to NOT use of "RESET MASTER". When some positions
desynchronises - it will need manual intervention. In original version
you can not even notice that you miss some inserts in replicas!

This concept works only in dual-node mysql setup (namely: when a slave
have only one choose for master). In more-node setup it will need manual
synchronization (or original version with RESET MASTER - with warning
above).

Patch is also available here:
http://marmarek.w.staszic.waw.pl/patches/mysql-repl.patch

-- 
Best Regards,
Marek Marczykowski          |   gg:2873965      | RLU #390519
marmarek at staszic waw pl  | xmpp:marmarek at staszic waw pl

Some features and bugfix by Marek Marczykowski <marma...@staszic.waw.pl>
 * [bugfix] monitor return $OCF_RUNNING_MASTER on master
 * [bugfix] slave info collected with replication user
 * [bugfix] cut ending space from OCF_* host lists
 * [doc] suggest --skip-slave-start option
 * [feature] detailed logging on errors
 * [feature] setup replication on late slave start
 * [feature] another concept of M/S replication - try to keep state

The last think works only for dual node mysql setup.

--- mysql-repl.orig     2010-06-27 01:15:42.057546595 +0200
+++ mysql-repl  2010-06-28 01:14:31.929142055 +0200
@@ -76,6 +76,7 @@
 OCF_RESKEY_replication_port_default="3306"
 OCF_RESKEY_max_slave_lag_default="3600"
 OCF_RESKEY_evict_outdated_slaves_default="false"
+OCF_RESKEY_state_default=${HA_RSCTMP}/Mysql-repl-${OCF_RESOURCE_INSTANCE}.state
 
 : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
 MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
@@ -106,6 +107,8 @@
 : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
 : 
${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
 
+: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}}
+
 #######################################################################
 
 usage() {
@@ -248,6 +251,7 @@
 <longdesc lang="en">
 Additional parameters which are passed to the mysqld on startup.
 (e.g. --skip-external-locking or --skip-grant-tables)
+On M/S setup --skip-slave-start is needed (or in config file).
 </longdesc>
 <shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
 <content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
@@ -307,6 +311,15 @@
 <content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" 
/>
 </parameter>
 
+<parameter name="state" unique="1">
+<longdesc lang="en">
+Location to store the mysql replication state in.
+</longdesc>
+<shortdesc lang="en">State file</shortdesc>
+<content type="string" default="${OCF_RESKEY_state_default}" />
+</parameter>
+
+
 </parameters>
 
 <actions>
@@ -386,37 +399,48 @@
     return 1
 }
 
-check_slave() {
-    # Checks slave status
-    local rc
-    local tmpfile
+get_slave_info() {
+    # Warning: this sets $tmpfile and LEAVE this file! You must delete it 
after use!
+
     local mysql_options
 
-    rc=1
     tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
 
-    mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user 
--password=$OCF_RESKEY_test_passwd"
+    mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user 
--password=$OCF_RESKEY_replication_passwd"
 
     $MYSQL $mysql_options \
         -e 'SHOW SLAVE STATUS\G' > $tmpfile
 
-    local master_host
-    local master_user
-    local master_port
-    local slave_sql
-    local slave_io
-    local last_errno
-    local secs_behind
-
     if [ -s $tmpfile ]; then
-       master_host=`sed -ne 's/^.*Master_Host: \(.*\)$/\1/p' < $tmpfile`
-       master_user=`sed -ne 's/^.*Master_User: \(.*\)$/\1/p' < $tmpfile`
-       master_port=`sed -ne 's/^.*Master_Port: \(.*\)$/\1/p' < $tmpfile`
-       slave_sql=`sed -ne 's/^.*Slave_SQL_Running: \(.*\)$/\1/p' < $tmpfile`
-       slave_io=`sed -ne 's/^.*Slave_IO_Running: \(.*\)$/\1/p' < $tmpfile`
-       last_errno=`sed -ne 's/^.*Last_Errno: \(.*\)$/\1/p' < $tmpfile`
-       secs_behind=`sed -ne 's/^.*Seconds_Behind_Master: \(.*\)$/\1/p' < 
$tmpfile`
+       master_host=`sed -ne 's/^.* Master_Host: \(.*\)$/\1/p' < $tmpfile`
+       master_user=`sed -ne 's/^.* Master_User: \(.*\)$/\1/p' < $tmpfile`
+       master_port=`sed -ne 's/^.* Master_Port: \(.*\)$/\1/p' < $tmpfile`
+       master_log_file=`sed -ne 's/^.* Master_Log_File: \(.*\)$/\1/p' < 
$tmpfile`
+       master_log_pos=`sed -ne 's/^.* Read_Master_Log_Pos: \(.*\)$/\1/p' < 
$tmpfile`
+       slave_sql=`sed -ne 's/^.* Slave_SQL_Running: \(.*\)$/\1/p' < $tmpfile`
+       slave_io=`sed -ne 's/^.* Slave_IO_Running: \(.*\)$/\1/p' < $tmpfile`
+       last_errno=`sed -ne 's/^.* Last_Errno: \(.*\)$/\1/p' < $tmpfile`
+       secs_behind=`sed -ne 's/^.* Seconds_Behind_Master: \(.*\)$/\1/p' < 
$tmpfile`
 
+        ocf_log debug "MySQL instance running as a replication slave"
+    else
+        # Instance produced an empty "SHOW SLAVE STATUS" output --
+        # instance is not a slave
+       ocf_log err "check_slave invoked on an instance that is not a 
replication slave."
+       return $OCF_ERR_GENERIC
+    fi
+
+    return $OCF_SUCCESS
+}
+
+check_slave() {
+    # Checks slave status
+    local rc
+
+    get_slave_info
+    rc=$?
+
+    if [ $rc -eq 0 ]; then
        if [ $last_errno -ne 0 ]; then
            # Whoa. Replication ran into an error. This slave has
            # diverged from its master. Make sure this resource
@@ -458,7 +482,7 @@
            master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
            if [ $master_pref -lt 0 ]; then
                # Sanitize a below-zero preference to just zero
-               $master_pref=0
+               master_pref=0
            fi
            $CRM_MASTER -v $master_pref
        fi
@@ -475,18 +499,42 @@
 }
 
 set_master() {
+    local new_master_host
+    local master_params
+
+    new_master_host=$1
+
+    # Keep replication position
+    get_slave_info
+
+    if [ "$master_log_file" -a "$new_master_host" = "$master_host" ]; then
+        master_params=", MASTER_LOG_FILE='$master_log_file', \
+                         MASTER_LOG_POS=$master_log_pos"
+        ocf_log debug "Kept master pos for $master_host : 
$master_log_file:$master_log_pos"
+    elif [ -r "$OCF_RESKEY_state" ]; then
+        master_host=
+        . $OCF_RESKEY_state
+        if [ "$new_master_host" = "$master_host" ]; then
+                master_params=", MASTER_LOG_FILE='$master_log_file', \
+                                 MASTER_LOG_POS=$master_log_pos"
+                 ocf_log debug "Restored master pos for $master_host : 
$master_log_file:$master_log_pos"
+        fi
+     fi
+
     # Informs the MySQL server of the master to replicate
     # from. Accepts one mandatory argument which must contain the host
     # name of the new master host. The master must either be unchanged
     # from the laste master the slave replicated from, or freshly
     # reset with RESET MASTER.
-    local master_host
-    master_host=$1
 
     ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
-       -e "CHANGE MASTER TO MASTER_HOST='$master_host', \
+       -e "CHANGE MASTER TO MASTER_HOST='$new_master_host', \
                              MASTER_USER='$OCF_RESKEY_replication_user', \
-                             MASTER_PASSWORD='$OCF_RESKEY_replication_passwd'"
+                             MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' 
$master_params"
+
+    # Remove state file - it will be invalid after SLAVE START
+    rm -f $OCF_RESKEY_state
+    rm -f $tmpfile
 }
 
 unset_master(){
@@ -511,7 +559,12 @@
     # First, stop the slave I/O thread and wait for relay log
     # processing to complete
     ocf_run $MYSQL $mysql_options \
-       -e "STOP SLAVE IO_THREAD" || exit $OCF_ERR_GENERIC
+       -e "STOP SLAVE IO_THREAD"
+    if [ $? -gt 0 ]; then
+       ocf_log err "Error stopping slave IO thread"
+       exit $OCF_ERR_GENERIC
+    fi
+
     while true; do
        $MYSQL $mysql_options \
            -e 'SHOW PROCESSLIST\G' > $tmpfile
@@ -526,9 +579,27 @@
 
     # Now, stop all slave activity and unset the master host
     ocf_run $MYSQL $mysql_options \
-       -e "STOP SLAVE" || exit $OCF_ERR_GENERIC
+       -e "STOP SLAVE"
+    if [ $? -gt 0 ]; then
+       ocf_log err "Error stopping rest slave threads"
+       exit $OCF_ERR_GENERIC
+    fi
+    
+    #Save current state
+    get_slave_info
+    cat <<EOF > $OCF_RESKEY_state
+master_host="$master_host"
+master_log_file="$master_log_file"
+master_log_pos="$master_log_pos"
+EOF
+    rm -f $tmpfile
+
     ocf_run $MYSQL $mysql_options \
-       -e "CHANGE MASTER TO MASTER_HOST=''" || exit $OCF_ERR_GENERIC
+       -e "CHANGE MASTER TO MASTER_HOST=''" 
+    if [ $? -gt 0 ]; then
+           ocf_log err "Failed to set master"
+           exit $OCF_ERR_GENERIC
+    fi
 }
 
 #######################################################################
@@ -596,7 +667,6 @@
        return $rc
     fi
 
-
     if [ $OCF_CHECK_LEVEL -gt 0 ]; then
        # Check if this instance is configured as a slave, and if so
        # check slave status
@@ -618,8 +688,13 @@
        fi
     fi
 
-    ocf_log info "MySQL monitor succeeded";
-    return $OCF_SUCCESS
+    if [ "$OCF_RESKEY_CRM_meta_role" = "Master" ]; then
+           ocf_log info "MySQL monitor succeeded (master)";
+           return $OCF_RUNNING_MASTER
+    else
+           ocf_log info "MySQL monitor succeeded";
+           return $OCF_SUCCESS
+    fi
 }
 
 mysql_start() {
@@ -710,6 +785,19 @@
        # don't know what master to replicate from), we simply start
        # in read only mode.
        set_read_only on
+
+       master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
+       if [ "$master_host" -a "$master_host" != `uname -n` ]; then
+           ocf_log info "Changing MySQL configuration to replicate from 
$master_host."
+           set_master $master_host
+           ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+               -e "SLAVE START"
+           if [ $? -ne 0 ]; then
+               ocf_log err "Failed to start slave";
+               return $OCF_ERR_GENERIC;
+           fi
+       fi
+
        # We also need to set a master preference, otherwise Pacemaker
        # won't ever promote us in the absence of any explicit
        # preference set by the administrator. We choose a low
@@ -772,6 +860,8 @@
     if ( ! mysql_status ); then
        return $OCF_NOT_RUNNING
     fi
+    ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+       -e "SLAVE STOP"
     set_read_only off || return $OCF_ERR_GENERIC
 
     # Existing master gets a higher-than-default master preference, so
@@ -783,7 +873,15 @@
 }
 
 mysql_demote() {
-    set_read_only on || return $OCF_ERR_GENERIC
+    if ( ! mysql_status ); then
+       return $OCF_NOT_RUNNING
+    fi
+
+    set_read_only on
+    if [ $? -ne 0 ]; then
+       ocf_log err "Failed to set read-only";
+       return $OCF_ERR_GENERIC;
+    fi
 
     # Return master preference to default, so the cluster manager gets
     # a chance to select a new master
@@ -811,7 +909,7 @@
            # connect to it and wait for it to start replicating.
            local master_host
            local master_status
-           master_host=$OCF_RESKEY_CRM_meta_notify_promote_uname
+           master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " 
"`
 
            if ( ! mysql_status ); then
                return $OCF_NOT_RUNNING
@@ -822,30 +920,35 @@
            fi
 
            if [ $master_host = `uname -n` ]; then
-               ocf_log info "Resetting MySQL replication configuration on new 
master $master_host"
-               ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
-                   -e 'RESET MASTER'
+               ocf_log info "I will be new master"
+               #ocf_log info "Resetting MySQL replication configuration on new 
master $master_host"
+               #ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+               #    -e 'RESET MASTER'
            else
                ocf_log info "Changing MySQL configuration to replicate from 
$master_host"
                set_master $master_host
            fi
+           return $OCF_SUCCESS
            ;;
        'post-promote')
            # The master has completed its promotion. Now is a good
            # time to check whether our replication slave is working
            # correctly.
-           if [ $OCF_RESKEY_CRM_meta_notify_promote_uname = `uname -n` ]; then
+           master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " 
"`
+           if [ "$master_host" = `uname -n` ]; then
                ocf_log info "Ignoring post-promote notification for my own 
promotion."
                return $OCF_SUCCESS
            fi
            ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
-               -e 'START SLAVE';
+               -e 'START SLAVE';
            ;;
        'post-demote')
-           if [ $OCF_RESKEY_CRM_meta_notify_demote_uname = `uname -n` ]; then
+           demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " 
"`
+           if [ $demote_host = `uname -n` ]; then
                ocf_log info "Ignoring post-demote notification for my own 
demotion."
                return $OCF_SUCCESS
            fi
+           ocf_log info "post-demote notification for $demote_host."
            # The former master has just been gracefully demoted.
            unset_master
            ;;

Attachment: smime.p7s
Description: S/MIME cryptographic signature

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to