Hi Dejan,
Le 2012-05-08 11:15, Dejan Muhamedagic a écrit :
> Hi Yves,
>
> On Fri, May 04, 2012 at 04:29:34PM -0400, Yves Trudeau wrote:
>> Hi Dejan,
>> here's another modified patch for the mysql agent of the commit
>> version 4c18035 ([email protected]:y-trudeau/resource-agents.git branch
>> mysql-repl). Following a comment of Keisuke, I put back the log level
>> for mysql_status in probe mode.
>
> Cool!
>
> So, according to the discussion at github, you insist that the
> replication IP needs to be specified in the node's static
> attributes. I'm not very happy with it, because it is a
> precedence, but I won't insist on it. Let's hope for the best.
Like we discussed today, I'll add a fallback to 'uname -n' if the IP
attribute is not present for the node.
>
> There are a few concerns which went unanswered so far:
>
> 1a. In case this attribute is not set, would replication fail
> properly (with an informative error message) or misbehave? It is
> not explicitely checked for existence in the code.
>
> 1b. Mori-san suggested to make this attribute optional and in
> case it doesn't exist just to use uname. That sounds like a good
> idea to me.
>
Agree
> 2. Is it possible/plausible to have more than one mysql
> instance? If so, then the attribute name should include the
> instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
> something to that extent. Also, it would make for a better
> looking configuration. "IP" doesn't really say much.
Yes, I just added that code for a customer. I'll use
${INSTANCE_NAME}_replication_info.
>
> 3. This attribute is part of the configuration and supposed to
> be setup by the user. Please document that in the meta-data.
>
> Cheers,
>
> Dejan
>
> P.S. Any chance of finishing this by Friday?
This Friday, yes, it should be possible.
Regards,
Yves
>
>> Regards,
>>
>> Yves
>
>> --- ../../mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>> +++ mysql 2012-05-04 15:58:50.318419875 -0400
>> @@ -79,6 +79,7 @@
>> OCF_RESKEY_max_slave_lag_default="3600"
>> OCF_RESKEY_evict_outdated_slaves_default="false"
>> OCF_RESKEY_reader_attribute_default="readable"
>> +OCF_RESKEY_replication_info_attribute_default="replication_info"
>>
>> : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
>> MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
>> @@ -109,7 +110,8 @@
>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>> :
>> ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>
>> -:
>> ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>> +:
>> ${OCF_RESKEY_replication_info_attribute=${OCF_RESKEY_replication_info_attribute_default}}
>>
>> #######################################################################
>>
>> @@ -328,7 +330,19 @@
>> </longdesc>
>> <shortdesc lang="en">Sets the node attribute that determines
>> whether a node is usable for clients to read from.</shortdesc>
>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>> +</parameter>
>> +
>> +<parameter name="replication_info_attribute" unique="1" required="0">
>> +<longdesc lang="en">
>> +An attribute that stores the current master IP, replication file and
>> position.
>> +This is queried by the agent in the post-promote notification
>> +to reconnect the slaves to the new master.
>> +
>> +This parameter is only meaningful in master/slave set configurations.
>> +</longdesc>
>> +<shortdesc lang="en">Cluster attribute storing replication
>> information</shortdesc>
>> +<content type="string"
>> default="${OCF_RESKEY_replication_info_attribute_default}" />
>> </parameter>
>> </parameters>
>>
>> @@ -355,10 +369,12 @@
>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL
>> --user=$OCF_RESKEY_replication_user
>> --password=$OCF_RESKEY_replication_passwd"
>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user
>> --password=$OCF_RESKEY_test_passwd"
>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>
>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>> HOSTNAME=`uname -n`
>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name
>> ${OCF_RESKEY_replication_info_attribute} -s mysql_replication --query -q"
>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>
>> #######################################################################
>> @@ -468,7 +484,7 @@
>>
>> if [ $rc -eq 0 ]; then
>> # Did we receive an error other than max_connections?
>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>> + if [ $last_errno -ne 0 -a $last_errno -ne
>> "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> # Whoa. Replication ran into an error. This slave has
>> # diverged from its master. Make sure this resource
>> # doesn't restart in place.
>> @@ -484,7 +500,7 @@
>> fi
>>
>> # If we got max_connections, let's remove the vip
>> - if [ $last_errno -eq 1040 ]; then
>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> set_reader_attr 0
>> exit $OCF_SUCCESS
>> fi
>> @@ -496,7 +512,7 @@
>> ocf_log warn "MySQL Slave IO threads currently not running."
>>
>> # Sanity check, are we at least on the right master
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config
>> --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>>
>> if [ "$master_host" != "$new_master_IP" ]; then
>> # Not pointing to the right master, not good, removing the
>> VIPs
>> @@ -573,7 +589,7 @@
>> local new_master_IP master_log_file master_log_pos
>> local master_params
>>
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name
>> replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>>
>> # Keep replication position
>> get_slave_info
>> @@ -585,8 +601,8 @@
>> rm -f $tmpfile
>> return
>> else
>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config
>> --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config
>> --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>> + master_log_file=`$CRM_ATTR_REPL_INFO | cut -d'|' -f2`
>> + master_log_pos=`$CRM_ATTR_REPL_INFO | cut -d'|' -f3`
>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>> MASTER_LOG_POS=$master_log_pos"
>> @@ -660,7 +676,7 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished processing relay log"
>> break
>> fi
>> @@ -783,7 +799,7 @@
>>
>> mysql_status() {
>> if [ ! -e $OCF_RESKEY_pid ]; then
>> - ocf_log err "MySQL is not running"
>> + ocf_log $1 "MySQL is not running"
>> return $OCF_NOT_RUNNING;
>> fi
>>
>> @@ -797,7 +813,7 @@
>> if [ $? -eq 0 ]; then
>> return $OCF_SUCCESS;
>> else
>> - ocf_log err "MySQL not running: removing old PID file"
>> + ocf_log $1 "MySQL not running: removing old PID file"
>> rm -f $OCF_RESKEY_pid
>> return $OCF_NOT_RUNNING;
>> fi
>> @@ -811,8 +827,9 @@
>> if ocf_is_probe; then
>> status_loglevel="info"
>> fi
>> -
>> +
>> mysql_status $status_loglevel
>> +
>> rc=$?
>>
>> # TODO: check max connections error
>> @@ -856,7 +873,7 @@
>> set_reader_attr 0
>> fi
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? = $OCF_SUCCESS ]; then
>> ocf_log info "MySQL already running"
>> return $OCF_SUCCESS
>> @@ -930,7 +947,7 @@
>> # Let the CRM/LRM time us out if required.
>> start_wait=1
>> while [ $start_wait = 1 ]; do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_SUCCESS ]; then
>> start_wait=0
>> @@ -1019,7 +1036,7 @@
>> count=0
>> while [ $count -lt $shutdown_timeout ]
>> do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_NOT_RUNNING ]; then
>> break
>> @@ -1029,7 +1046,7 @@
>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>> done
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? != $OCF_NOT_RUNNING ]; then
>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s
>> using SIGTERM. Trying SIGKILL..."
>> /bin/kill -KILL $pid> /dev/null
>> @@ -1044,7 +1061,7 @@
>> mysql_promote() {
>> local master_info
>>
>> - if ( ! mysql_status ); then
>> + if ( ! mysql_status err ); then
>> return $OCF_NOT_RUNNING
>> fi
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> @@ -1053,7 +1070,7 @@
>> # Set Master Info in CIB, cluster level attribute
>> update_data_master_status
>> master_info="$(get_local_ip)|$(get_master_status
>> File)|$(get_master_status Position)"
>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info
>> -s mysql_replication -v "$master_info"
>> + ${HA_SBIN_DIR}/crm_attribute --type crm_config --name
>> ${OCF_RESKEY_replication_info_attribute} -s mysql_replication -v
>> "$master_info"
>> rm -f $tmpfile
>>
>> set_read_only off || return $OCF_ERR_GENERIC
>> @@ -1070,7 +1087,7 @@
>> }
>>
>> mysql_demote() {
>> - if ! mysql_status; then
>> + if ! mysql_status err; then
>> return $OCF_NOT_RUNNING
>> fi
>>
>> @@ -1177,6 +1194,7 @@
>> # The log directory must be a directory owned by root, with permissions
>> 0700,
>> # and the log must be writable and not a symlink.
>> ##########################################################################
>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>> @@ -1214,7 +1232,7 @@
>> case "$1" in
>> start) mysql_start;;
>> stop) mysql_stop;;
>> - status) mysql_status;;
>> + status) mysql_status err;;
>> monitor) mysql_monitor;;
>> promote) mysql_promote;;
>> demote) mysql_demote;;
>
>> _______________________________________________________
>> Linux-HA-Dev: [email protected]
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>
> _______________________________________________________
> Linux-HA-Dev: [email protected]
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/