Greetings, all -- If anyone's interested, I wrote a resource agent that works with Microsoft Azure. I'm no expert at shell scripting, so I'm certain it needs a great deal of improvement, but I've done some testing and it works with a 2-node cluster in my Azure environment. Offhand, I don't know any reason why it wouldn't work with larger clusters, too.
My colocation stack looks like this: mysql -> azure_ip -> cluster_ip -> filesystem -> drbd Failover takes up to 4 minutes because it takes that long for the Azure IP address de-association and re-association to complete. None of the delay is the fault of the cluster itself. Right now the script burps a bunch of debug output to syslog, which is helpful if you feel like you're waiting forever for the cluster to failover, you can look at /var/log/messages and see that you're waiting for the Azure cloud to finish something. To eliminate the debug messages, set DEBUG_LEVEL to 0. The agent requires the Azure client to be installed and the nodes to have been logged into the cloud. It currently only works with one NIC per VM, and two ipconfigs per NIC (one of which is the floating cluster IP). This is obviously beta as it currently only works with a manual failover. I need to add some code to handle an actual node crash or power-plug test. Feedback, suggestions, improvements are welcome. If someone who knows awk wants to clean up my azure client calls, that would be a good place to start. -- #!/bin/sh # # OCF parameters are as below # OCF_RESKEY_ip ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs DEBUG_LEVEL=2 MY_HOSTNAME=$(hostname -s) SCRIPT_NAME=$(basename $0) ####################################################################### meta_data() { logIt "debug1: entered: meta_data()" cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="AZaddr2"> <version>1.0</version> <longdesc lang="en"> Resource agent for managing IP configs in Azure. </longdesc> <shortdesc lang="en">Short descrption/</shortdesc> <parameters> <parameter name="ip" unique="1" required="1"> <longdesc lang="en"> The IPv4 (dotted quad notation) example IPv4 "192.168.1.1". </longdesc> <shortdesc lang="en">IPv4 address</shortdesc> <content type="string" default="" /> </parameter> </parameters> <actions> <action name="start" timeout="20s" /> <action name="stop" timeout="20s" /> <action name="status" depth="0" timeout="20s" interval="10s" /> <action name="monitor" depth="0" timeout="20s" interval="10s" /> <action name="meta-data" timeout="5s" /> <action name="validate-all" timeout="20s" /> </actions> </resource-agent> END logIt "leaving: exiting: meta_data()" return $OCF_SUCCESS } azip_query() { logIt "debug1: entered: azip_query()" logIt "debug1: checking to determine if an Azure ipconfig named '$AZ_IPCONFIG_NAME' exists for the interface" logIt "debug1: executing: az network nic ip-config show --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME -g $AZ_RG_NAME 2>&1" R=$(az network nic ip-config show --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME -g $AZ_RG_NAME 2>&1) logIt "debug2: $R" R2=$(echo "$R"|grep "does not exist") if [ -n "$R2" ]; then logIt "debug1: ipconfig named '$AZ_IPCONFIG_NAME' does not exist" return $OCF_NOT_RUNNING else R2=$(echo "$R"|grep "Succeeded") if [ -n "$R2" ]; then logIt "debug1: ipconfig '$AZ_IPCONFIG_NAME' exists" return $OCF_SUCCESS else logIt "debug1: not sure how this happens" return $OCF_ERR_GENERIC fi fi logIt "debug1: exiting: azip_query()" } azip_usage() { cat <<END usage: $0 {start|stop|status|monitor|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set. END return $OCF_SUCCESS } azip_start() { logIt "debug1: entered: azip_start()" #--if a matching ipconfig already exists in Azure, return success azip_query;RC=$? if [ $RC -eq 0 ]; then logIt "debug1: $OCF_RESKEY_ip is already associated" return $OCF_SUCCESS else #--create an interface ipconfig in Azure logIt "debug1: creating ipconfig '$AZ_IPCONFIG_NAME'" logIt "debug1: executing: az network nic ip-config create --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME --private-ip-address $OCF_RESKEY_ip \ --subnet $AZ_SUBNET_NAME --vnet-name $AZ_VNET_NAME" R=$(az network nic ip-config create --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME --private-ip-address $OCF_RESKEY_ip --subnet $AZ_SUBNET_NAME --vnet-name $AZ_VNET_NAME) logIt "debug2: output: $R" R2=$(echo "$R"|grep Succeeded) if [ -n "$R2" ]; then logIt "debug1: $AZ_IPCONFIG_NAME created successfully with ip address $OCF_RESKEY_ip" return $OCF_SUCCESS else logIt "debug1: failed to create ipconfig $AZ_IPCONFIG_NAME" return $OCF_ERR_GENERIC fi fi logIt "debug1: exiting: azip_start()" } azip_stop() { logIt "debug1: entered: azip_stop()" #--if there is no matching ipconfig in Azure, exit with success azip_query;RC=$? if [ $RC -ne 0 ]; then logIt "debug1: $OCF_RESKEY_ip is not associated" return $OCF_SUCCESS else #--delete it logIt "deleting ipconfig '$AZ_IPCONFIG_NAME'" R=$(az network nic ip-config delete --name $AZ_IPCONFIG_NAME --nic-name $AZ_NIC_NAME --resource-group $AZ_RG_NAME) #--verify that it was deleted logIt "verifying that ipconfig '$AZ_IPCONFIG_NAME' got removed" azip_query;RC=$? if [ $RC -ne 0 ]; then logIt "debug1: ipconfig $AZ_IPCONFIG_NAME successfully removed" return $OCF_SUCCESS else logIt "failed to remove $AZ_IPCONFIG_NAME." return $OCF_ERR_GENERIC fi fi logIt "debug1: exiting: azip_stop()" } azip_monitor() { logIt "debug1: entered: azip_monitor()" azip_query;RC=$? logIt "debug1: function azip_query() returned $RC" if [ $RC -eq 0 ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi logIt "debug1: exiting: azip_monitor()" } logIt() { MSG=$1 if [ ${MSG:0:5} == "debug" ] then MSG_LEVEL=${MSG:5:1} if [ $MSG_LEVEL -le $DEBUG_LEVEL ]; then if [ $MSG_LEVEL -gt 1 ]; then logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): debug$MSG_LEVEL ####################################################################" logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1" logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): debug$MSG_LEVEL ####################################################################" else logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1" fi else return fi else logger "$SCRIPT_NAME ($OCF_RESOURCE_INSTANCE): $1" fi } logIt "debug1: agent was asked to $__OCF_ACTION resource $OCF_RESOURCE_INSTANCE with IP address $OCF_RESKEY_ip" ###################################################################### # Azure Initialization ###################################################################### #--check if azure is enabled AZ_ENABLED=$(az account show|grep Enabled|sed "s/ *//g"|cut -d":" -f2|sed "s/\"//g"|sed "s/,//g") if [ -z "$AZ_ENABLED" ] then logIt "Azure account not detected" logIt "debug1: exiting $SCRIPT_NAME" exit $OCF_ERR_GENERIC else logIt "debug1: AZ_ENABLED=$AZ_ENABLED" fi #--set the ipconfig name AZ_IPCONFIG_NAME="ipconfig-""$OCF_RESKEY_ip" logIt "debug1: AZ_IPCONFIG_NAME=$AZ_IPCONFIG_NAME" #--get the resource group name AZ_RG_NAME=$(az group list|grep name|cut -d":" -f2|sed "s/ *//g"|sed "s/\"//g"|sed "s/,//g") if [ -z "$AZ_RG_NAME" ] then logIt "could not determine the Azure resource group name" exit $OCF_ERR_GENERIC else logIt "debug1: AZ_RG_NAME=$AZ_RG_NAME" fi #--get the nic name AZ_NIC_NAME=$(az vm nic list -g $AZ_RG_NAME --vm-name $MY_HOSTNAME|grep networkInterfaces|cut -d"/" -f9|sed "s/\",//g") if [ -z "$AZ_NIC_NAME" ] then echo "could not determine the Azure NIC name" exit $OCF_ERR_GENERIC else logIt "debug1: AZ_NIC_NAME=$AZ_NIC_NAME" fi #--get the vnet and subnet names R=$(az network nic show --name $AZ_NIC_NAME --resource-group $AZ_RG_NAME|grep -i subnets|head -1|sed "s/ */ /g"|cut -d"/" -f9,11|sed "s/\",//g") LDIFS=$IFS IFS="/" R_ARRAY=( $R ) AZ_VNET_NAME=${R_ARRAY[0]} AZ_SUBNET_NAME=${R_ARRAY[1]} if [ -z "$AZ_VNET_NAME" ] then logIt "could not determine Azure vnet name" exit $OCF_ERR_GENERIC else logIt "debug1: AZ_VNET_NAME=$AZ_VNET_NAME" fi if [ -z "$AZ_SUBNET_NAME" ] then logIt "could not determine the Azure subnet name" exit $OCF_ERR_GENERIC else logIt "debug1: AZ_SUBNET_NAME=$AZ_SUBNET_NAME" fi ###################################################################### # Actions ###################################################################### case $__OCF_ACTION in meta-data) meta_data RC=$? ;; usage|help) azip_usage RC=$? ;; start) azip_start RC=$? ;; stop) azip_stop RC=$? ;; status) azip_query RC=$? ;; monitor) azip_monitor RC=$? ;; validate-all) ;; *) azip_usage RC=$OCF_ERR_UNIMPLEMENTED ;; esac #--exit with return code logIt "debug1: exiting $SCRIPT_NAME with code $RC" exit $RC #--end -- Eric Robinson
_______________________________________________ Users mailing list: Users@clusterlabs.org http://lists.clusterlabs.org/mailman/listinfo/users Project Home: http://www.clusterlabs.org Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf Bugs: http://bugs.clusterlabs.org