Hi Yves, It would be good not to start a new thread for the same discussion.
On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote: > Hi Dejan, > here's another modified patch for the mysql agent of the commit > version 4c18035 ([email protected]:y-trudeau/resource-agents.git branch > mysql-repl). This patch implements fallback on uname -n if the node > IP attribute is not present and uses the instance name for the > replication info attribute. Hmm, it looks like there was a misunderstanding here. The attribute named "IP" is still named "IP" :) And we're still missing the documentation for that attribute. > I am also working with Raoul to get me > back on track with git. Good! Cheers, Dejan > > Regards, > > Yves > --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400 > +++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 > -0400 > @@ -109,7 +109,7 @@ > : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}} > : > ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}} > > -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}} > +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}} > > ####################################################################### > > @@ -328,7 +328,7 @@ > </longdesc> > <shortdesc lang="en">Sets the node attribute that determines > whether a node is usable for clients to read from.</shortdesc> > -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" /> > +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" /> > </parameter> > </parameters> > > @@ -355,11 +355,13 @@ > MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10" > MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user > --password=$OCF_RESKEY_replication_passwd" > MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user > --password=$OCF_RESKEY_test_passwd" > +MYSQL_TOO_MANY_CONN_ERR=1040 > > CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot " > HOSTNAME=`uname -n` > CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME " > INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'` > +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name > ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication" > > ####################################################################### > > @@ -461,14 +463,14 @@ > > check_slave() { > # Checks slave status > - local rc new_master_IP > + local rc new_master > > get_slave_info > rc=$? > > if [ $rc -eq 0 ]; then > # Did we receive an error other than max_connections? > - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then > + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" > ]; then > # Whoa. Replication ran into an error. This slave has > # diverged from its master. Make sure this resource > # doesn't restart in place. > @@ -484,7 +486,7 @@ > fi > > # If we got max_connections, let's remove the vip > - if [ $last_errno -eq 1040 ]; then > + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then > set_reader_attr 0 > exit $OCF_SUCCESS > fi > @@ -496,9 +498,9 @@ > ocf_log warn "MySQL Slave IO threads currently not running." > > # Sanity check, are we at least on the right master > - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config > --name replication_info -s mysql_replication --query -q | cut -d'|' -f1` > + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1` > > - if [ "$master_host" != "$new_master_IP" ]; then > + if [ "$master_host" != "$new_master" ]; then > # Not pointing to the right master, not good, removing the > VIPs > set_reader_attr 0 > > @@ -570,23 +572,23 @@ > } > > set_master() { > - local new_master_IP master_log_file master_log_pos > + local new_master master_log_file master_log_pos > local master_params > > - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name > replication_info -s mysql_replication --query -q | cut -d'|' -f1` > + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1` > > # Keep replication position > get_slave_info > > - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then > + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then > # master_params=", MASTER_LOG_FILE='$master_log_file', \ > # MASTER_LOG_POS=$master_log_pos" > ocf_log info "Kept master pos for $master_host : > $master_log_file:$master_log_pos" > rm -f $tmpfile > return > else > - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config > --name replication_info -s mysql_replication --query -q | cut -d'|' -f2` > - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config > --name replication_info -s mysql_replication --query -q | cut -d'|' -f3` > + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2` > + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3` > if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then > master_params=", MASTER_LOG_FILE='$master_log_file', \ > MASTER_LOG_POS=$master_log_pos" > @@ -601,7 +603,7 @@ > # reset with RESET MASTER. > > ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ > - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \ > + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \ > MASTER_USER='$OCF_RESKEY_replication_user', \ > MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params" > rm -f $tmpfile > @@ -628,15 +630,15 @@ > while true; do > $MYSQL $MYSQL_OPTIONS_REPL \ > -e 'SHOW PROCESSLIST\G' > $tmpfile > - if grep 'Waiting for master to send event' $tmpfile >/dev/null; then > + if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; > then > ocf_log info "MySQL slave has finished reading master binary log" > break > fi > - if grep 'Reconnecting after a failed master event read' $tmpfile > >/dev/null; then > + if grep -i 'Reconnecting after a failed master event read' $tmpfile > >/dev/null; then > ocf_log info "Master is down, no more binary logs to come" > break > fi > - if grep 'Connecting to master' $tmpfile >/dev/null; then > + if grep -i 'Connecting to master' $tmpfile >/dev/null; then > ocf_log info "Master is down, no more binary logs to come" > break > fi > @@ -660,7 +662,7 @@ > while true; do > $MYSQL $MYSQL_OPTIONS_REPL \ > -e 'SHOW PROCESSLIST\G' > $tmpfile > - if grep 'Has read all relay log' $tmpfile >/dev/null; then > + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then > ocf_log info "MySQL slave has finished processing relay log" > break > fi > @@ -744,8 +746,15 @@ > # Determines what IP address is attached to the current host. The output of > the > # crm_attribute command looks like this: > # scope=nodes name=IP value=10.2.2.161 > +# If the IP node attribute is not defined, fallback is to uname -n > get_local_ip() { > - $CRM_ATTR -l forever -n IP -q -G > + local IP > + IP=`$CRM_ATTR -l forever -n IP -q -G` > + if [ ! $? -eq 0 ]; then > + uname -n > + else > + echo $IP > + fi > } > > ####################################################################### > @@ -783,7 +792,7 @@ > > mysql_status() { > if [ ! -e $OCF_RESKEY_pid ]; then > - ocf_log err "MySQL is not running" > + ocf_log $1 "MySQL is not running" > return $OCF_NOT_RUNNING; > fi > > @@ -797,7 +806,7 @@ > if [ $? -eq 0 ]; then > return $OCF_SUCCESS; > else > - ocf_log err "MySQL not running: removing old PID file" > + ocf_log $1 "MySQL not running: removing old PID file" > rm -f $OCF_RESKEY_pid > return $OCF_NOT_RUNNING; > fi > @@ -811,8 +820,9 @@ > if ocf_is_probe; then > status_loglevel="info" > fi > - > + > mysql_status $status_loglevel > + > rc=$? > > # TODO: check max connections error > @@ -856,7 +866,7 @@ > set_reader_attr 0 > fi > > - mysql_status > + mysql_status info > if [ $? = $OCF_SUCCESS ]; then > ocf_log info "MySQL already running" > return $OCF_SUCCESS > @@ -930,7 +940,7 @@ > # Let the CRM/LRM time us out if required. > start_wait=1 > while [ $start_wait = 1 ]; do > - mysql_status > + mysql_status info > rc=$? > if [ $rc = $OCF_SUCCESS ]; then > start_wait=0 > @@ -1019,7 +1029,7 @@ > count=0 > while [ $count -lt $shutdown_timeout ] > do > - mysql_status > + mysql_status info > rc=$? > if [ $rc = $OCF_NOT_RUNNING ]; then > break > @@ -1029,7 +1039,7 @@ > ocf_log debug "MySQL still hasn't stopped yet. Waiting..." > done > > - mysql_status > + mysql_status info > if [ $? != $OCF_NOT_RUNNING ]; then > ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using > SIGTERM. Trying SIGKILL..." > /bin/kill -KILL $pid > /dev/null > @@ -1044,7 +1054,7 @@ > mysql_promote() { > local master_info > > - if ( ! mysql_status ); then > + if ( ! mysql_status err ); then > return $OCF_NOT_RUNNING > fi > ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ > @@ -1053,7 +1063,7 @@ > # Set Master Info in CIB, cluster level attribute > update_data_master_status > master_info="$(get_local_ip)|$(get_master_status > File)|$(get_master_status Position)" > - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info > -s mysql_replication -v "$master_info" > + ${CRM_ATTR_REPL_INFO} -v "$master_info" > rm -f $tmpfile > > set_read_only off || return $OCF_ERR_GENERIC > @@ -1070,7 +1080,7 @@ > } > > mysql_demote() { > - if ! mysql_status; then > + if ! mysql_status err; then > return $OCF_NOT_RUNNING > fi > > @@ -1177,6 +1187,7 @@ > # The log directory must be a directory owned by root, with permissions 0700, > # and the log must be writable and not a symlink. > ########################################################################## > +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log" > if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then > DEBUG_LOG_DIR="${DEBUG_LOG%/*}" > if [ -d "${DEBUG_LOG_DIR}" ]; then > @@ -1214,7 +1225,7 @@ > case "$1" in > start) mysql_start;; > stop) mysql_stop;; > - status) mysql_status;; > + status) mysql_status err;; > monitor) mysql_monitor;; > promote) mysql_promote;; > demote) mysql_demote;; > _______________________________________________________ > Linux-HA-Dev: [email protected] > http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev > Home Page: http://linux-ha.org/ _______________________________________________________ Linux-HA-Dev: [email protected] http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page: http://linux-ha.org/
