Hi Dejan,
here's another modified patch for the mysql agent of the commit
version 4c18035 (g...@github.com:y-trudeau/resource-agents.git branch
mysql-repl). Following a comment of Keisuke, I put back the log level
for mysql_status in probe mode.
Regards,
Yves
--- ../../mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
+++ mysql 2012-05-04 15:58:50.318419875 -0400
@@ -79,6 +79,7 @@
OCF_RESKEY_max_slave_lag_default="3600"
OCF_RESKEY_evict_outdated_slaves_default="false"
OCF_RESKEY_reader_attribute_default="readable"
+OCF_RESKEY_replication_info_attribute_default="replication_info"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
@@ -109,7 +110,8 @@
: ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
:
${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
-: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
+: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
+:
${OCF_RESKEY_replication_info_attribute=${OCF_RESKEY_replication_info_attribute_default}}
#######################################################################
@@ -328,7 +330,19 @@
</longdesc>
<shortdesc lang="en">Sets the node attribute that determines
whether a node is usable for clients to read from.</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
+<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
+</parameter>
+
+<parameter name="replication_info_attribute" unique="1" required="0">
+<longdesc lang="en">
+An attribute that stores the current master IP, replication file and position.
+This is queried by the agent in the post-promote notification
+to reconnect the slaves to the new master.
+
+This parameter is only meaningful in master/slave set configurations.
+</longdesc>
+<shortdesc lang="en">Cluster attribute storing replication
information</shortdesc>
+<content type="string"
default="${OCF_RESKEY_replication_info_attribute_default}" />
</parameter>
</parameters>
@@ -355,10 +369,12 @@
MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user
--password=$OCF_RESKEY_replication_passwd"
MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user
--password=$OCF_RESKEY_test_passwd"
+MYSQL_TOO_MANY_CONN_ERR=1040
CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
HOSTNAME=`uname -n`
CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name
${OCF_RESKEY_replication_info_attribute} -s mysql_replication --query -q"
INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
#######################################################################
@@ -468,7 +484,7 @@
if [ $rc -eq 0 ]; then
# Did we receive an error other than max_connections?
- if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
+ if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR"
]; then
# Whoa. Replication ran into an error. This slave has
# diverged from its master. Make sure this resource
# doesn't restart in place.
@@ -484,7 +500,7 @@
fi
# If we got max_connections, let's remove the vip
- if [ $last_errno -eq 1040 ]; then
+ if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
set_reader_attr 0
exit $OCF_SUCCESS
fi
@@ -496,7 +512,7 @@
ocf_log warn "MySQL Slave IO threads currently not running."
# Sanity check, are we at least on the right master
- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config
--name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
+ new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
if [ "$master_host" != "$new_master_IP" ]; then
# Not pointing to the right master, not good, removing the VIPs
@@ -573,7 +589,7 @@
local new_master_IP master_log_file master_log_pos
local master_params
- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name
replication_info -s mysql_replication --query -q | cut -d'|' -f1`
+ new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
# Keep replication position
get_slave_info
@@ -585,8 +601,8 @@
rm -f $tmpfile
return
else
- master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name
replication_info -s mysql_replication --query -q | cut -d'|' -f2`
- master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name
replication_info -s mysql_replication --query -q | cut -d'|' -f3`
+ master_log_file=`$CRM_ATTR_REPL_INFO | cut -d'|' -f2`
+ master_log_pos=`$CRM_ATTR_REPL_INFO | cut -d'|' -f3`
if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
master_params=", MASTER_LOG_FILE='$master_log_file', \
MASTER_LOG_POS=$master_log_pos"
@@ -660,7 +676,7 @@
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
- if grep 'Has read all relay log' $tmpfile >/dev/null; then
+ if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished processing relay log"
break
fi
@@ -783,7 +799,7 @@
mysql_status() {
if [ ! -e $OCF_RESKEY_pid ]; then
- ocf_log err "MySQL is not running"
+ ocf_log $1 "MySQL is not running"
return $OCF_NOT_RUNNING;
fi
@@ -797,7 +813,7 @@
if [ $? -eq 0 ]; then
return $OCF_SUCCESS;
else
- ocf_log err "MySQL not running: removing old PID file"
+ ocf_log $1 "MySQL not running: removing old PID file"
rm -f $OCF_RESKEY_pid
return $OCF_NOT_RUNNING;
fi
@@ -811,8 +827,9 @@
if ocf_is_probe; then
status_loglevel="info"
fi
-
+
mysql_status $status_loglevel
+
rc=$?
# TODO: check max connections error
@@ -856,7 +873,7 @@
set_reader_attr 0
fi
- mysql_status
+ mysql_status info
if [ $? = $OCF_SUCCESS ]; then
ocf_log info "MySQL already running"
return $OCF_SUCCESS
@@ -930,7 +947,7 @@
# Let the CRM/LRM time us out if required.
start_wait=1
while [ $start_wait = 1 ]; do
- mysql_status
+ mysql_status info
rc=$?
if [ $rc = $OCF_SUCCESS ]; then
start_wait=0
@@ -1019,7 +1036,7 @@
count=0
while [ $count -lt $shutdown_timeout ]
do
- mysql_status
+ mysql_status info
rc=$?
if [ $rc = $OCF_NOT_RUNNING ]; then
break
@@ -1029,7 +1046,7 @@
ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
done
- mysql_status
+ mysql_status info
if [ $? != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using
SIGTERM. Trying SIGKILL..."
/bin/kill -KILL $pid > /dev/null
@@ -1044,7 +1061,7 @@
mysql_promote() {
local master_info
- if ( ! mysql_status ); then
+ if ( ! mysql_status err ); then
return $OCF_NOT_RUNNING
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
@@ -1053,7 +1070,7 @@
# Set Master Info in CIB, cluster level attribute
update_data_master_status
master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status
Position)"
- ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s
mysql_replication -v "$master_info"
+ ${HA_SBIN_DIR}/crm_attribute --type crm_config --name
${OCF_RESKEY_replication_info_attribute} -s mysql_replication -v "$master_info"
rm -f $tmpfile
set_read_only off || return $OCF_ERR_GENERIC
@@ -1070,7 +1087,7 @@
}
mysql_demote() {
- if ! mysql_status; then
+ if ! mysql_status err; then
return $OCF_NOT_RUNNING
fi
@@ -1177,6 +1194,7 @@
# The log directory must be a directory owned by root, with permissions 0700,
# and the log must be writable and not a symlink.
##########################################################################
+DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
if [ -d "${DEBUG_LOG_DIR}" ]; then
@@ -1214,7 +1232,7 @@
case "$1" in
start) mysql_start;;
stop) mysql_stop;;
- status) mysql_status;;
+ status) mysql_status err;;
monitor) mysql_monitor;;
promote) mysql_promote;;
demote) mysql_demote;;
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/