Hi all, I'm having a bit of difficulty with the way that my cluster is behaving on failure of a resource.
The objective of my clustering setup is to provide a virtual IP, to which a number of other services are bound. The services are bound to the VIP with constraints to force the service to be running on the same host as the VIP. I have been testing the way that the cluster behaves if it is unable to start a resource. What I observe is the following: the cluster tries to start the resource on node 1, fails 10 times, reaches the migration threshold, moves the resource to the other host, fails 10 times, reaches the migration threshold. Now it has reached the migration threshold on all possible hosts. I was then expecting that it would stop the resource on all nodes and run all of the other resources as though nothing were wrong. What I see though is that the cluster demotes all master/slave resources, despite the fact that only one of them is failing. I wasn't able to find a parameter which would dictate what the behaviour should be if the migration failed on all available hosts. I must therefore believe that the constraints configuration I'm using isn't doing quite what I hope it's doing. Below is the configuration xml I am using on the hosts (no crmsh config, sorry). I am using Corosync 2.3.0 and Pacemaker 1.1.8, built from source. Regards, James <!-- Configuration file for pacemaker --> <resources> <!--resource for conntrackd--> <master id="master-conntrackd"> <meta_attributes id="master-conntrackd-meta_attributes"> <nvpair id="master-conntrackd-meta_attributes-notify" name="notify" value="true"/> <nvpair id="master-conntrackd-meta_attributes-interleave" name="interleave" value="true"/> <nvpair id="master-conntrackd-meta_attributes-target-role" name="target-role" value="Master"/> <nvpair id="master-conndtrakd-meta_attributes-failure-timeout" name="failure-timeout" value="600"/> <nvpair id="master-conntrackd-meta_attributes-migration-threshold" name="migration-threshold" value="10"/> </meta_attributes> <primitive id="conntrackd" class="ocf" provider="OSAG" type="conntrackd"> <operations> <op id="conntrackd-slave-check" name="monitor" interval="60" role="Slave" /> <op id="conntrackd-master-check" name="monitor" interval="61" role="Master" /> </operations> </primitive> </master> <master id="master-condition"> <meta_attributes id="master-condition-meta_attributes"> <nvpair id="master-condition-meta_attributes-notify" name="notify" value="false"/> <nvpair id="master-condition-meta_attributes-interleave" name="interleave" value="true"/> <nvpair id="master-condition-meta_attributes-target-role" name="target-role" value="Master"/> <nvpair id="master-condition-meta_attributes-failure-timeout" name="failure-timeout" value="600"/> <nvpair id="master-condition-meta_attributes-migration-threshold" name="migration-threshold" value="10"/> </meta_attributes> <primitive id="condition" class="ocf" provider="OSAG" type="condition"> <instance_attributes id="condition-attrs"> </instance_attributes> <operations> <op id="condition-slave-check" name="monitor" interval="10" role="Slave" /> <op id="condition-master-check" name="monitor" interval="11" role="Master" /> </operations> </primitive> </master> <master id="master-ospfd.init"> <meta_attributes id="master-ospfd-meta_attributes"> <nvpair id="master-ospfd-meta_attributes-notify" name="notify" value="false"/> <nvpair id="master-ospfd-meta_attributes-interleave" name="interleave" value="true"/> <nvpair id="master-ospfd-meta_attributes-target-role" name="target-role" value="Master"/> <nvpair id="master-ospfd-meta_attributes-failure-timeout" name="failure-timeout" value="600"/> <nvpair id="master-ospfd-meta_attributes-migration-threshold" name="migration-threshold" value="10"/> </meta_attributes> <primitive id="ospfd" class="ocf" provider="OSAG" type="osaginit"> <instance_attributes id="ospfd-attrs"> <nvpair id="ospfd-script" name="script" value="ospfd.init"/> </instance_attributes> <operations> <op id="ospfd-slave-check" name="monitor" interval="10" role="Slave" /> <op id="ospfd-master-check" name="monitor" interval="11" role="Master" /> </operations> </primitive> </master> <master id="master-ripd.init"> <meta_attributes id="master-ripd-meta_attributes"> <nvpair id="master-ripd-meta_attributes-notify" name="notify" value="false"/> <nvpair id="master-ripd-meta_attributes-interleave" name="interleave" value="true"/> <nvpair id="master-ripd-meta_attributes-target-role" name="target-role" value="Master"/> <nvpair id="master-ripd-meta_attributes-failure-timeout" name="failure-timeout" value="600"/> <nvpair id="master-ripd-meta_attributes-migration-threshold" name="migration-threshold" value="10"/> </meta_attributes> <primitive id="ripd" class="ocf" provider="OSAG" type="osaginit"> <instance_attributes id="ripd-attrs"> <nvpair id="ripd-script" name="script" value="ripd.init"/> </instance_attributes> <operations> <op id="ripd-slave-check" name="monitor" interval="10" role="Slave" /> <op id="ripd-master-check" name="monitor" interval="11" role="Master" /> </operations> </primitive> </master> <master id="master-squid.init"> <meta_attributes id="master-squid-meta_attributes"> <nvpair id="master-squid-meta_attributes-notify" name="notify" value="false"/> <nvpair id="master-squid-meta_attributes-interleave" name="interleave" value="true"/> <nvpair id="master-squid-meta_attributes-target-role" name="target-role" value="Master"/> <nvpair id="master-squid-meta_attributes-failure-timeout" name="failure-timeout" value="600"/> <nvpair id="master-squid-meta_attributes-migration-threshold" name="migration-threshold" value="10"/> </meta_attributes> <primitive id="squid" class="ocf" provider="OSAG" type="osaginit"> <instance_attributes id="squid-attrs"> <nvpair id="squid-script" name="script" value="squid.init"/> </instance_attributes> <operations> <op id="squid-slave-check" name="monitor" interval="10" role="Slave" /> <op id="squid-master-check" name="monitor" interval="11" role="Master" /> </operations> </primitive> </master> <!--resource for interface checks --> <clone id="clone-IFcheck"> <primitive id="IFcheck" class="ocf" provider="OSAG" type="ifmonitor"> <instance_attributes id="resIFcheck-attrs"> <nvpair id="IFcheck-interfaces" name="interfaces" value="eth0 eth1"/> <nvpair id="IFcheck-multiplier" name="multiplier" value="200"/> <nvpair id="IFcheck-dampen" name="dampen" value="6s" /> </instance_attributes> <operations> <op id="IFcheck-monitor" interval="3s" name="monitor"/> </operations> </primitive> </clone> <!--resource for ISP checks--> <clone id="clone-ISPcheck"> <primitive id="ISPcheck" class="ocf" provider="OSAG" type="ispcheck"> <instance_attributes id="ISPcheck-attrs"> <nvpair id="ISPcheck-ipsec" name="ipsec-check" value="1" /> <nvpair id="ISPcheck-ping" name="ping-check" value="1" /> <nvpair id="ISPcheck-multiplier" name="multiplier" value="200"/> <nvpair id="ISPcheck-dampen" name="dampen" value="60s"/> </instance_attributes> <operations> <op id="ISPcheck-monitor" interval="30s" name="monitor"/> </operations> </primitive> </clone> <!--Virtual IP group--> <group id="VIP-group"> <primitive id="eth1-0-192.168.1.10" class="ocf" provider="heartbeat" type="IPaddr2"> <meta_attributes id="meta-VIP-1"> <nvpair id="VIP-1-failure-timeout" name="failure-timeout" value="60"/> <nvpair id="VIP-1-migration-threshold" name="migration-threshold" value="50"/> </meta_attributes> <instance_attributes id="VIP-1-instance_attributes"> <nvpair id="VIP-1-IP" name = "ip" value="192.168.1.10"/> <nvpair id="VIP-1-nic" name="nic" value="eth1"/> <nvpair id="VIP-1-cidr" name="cidr_netmask" value="24"/> <nvpair id="VIP-1-iflabel" name="iflabel" value="0"/> <nvpair id="VIP-1-arp-sender" name="arp_sender" value="send_arp"/> </instance_attributes> <operations> <op id="VIP-1-monitor" interval="10s" name="monitor"/> </operations> </primitive> </group> </resources> <!--resource constraints--> <constraints> <!--set VIP location based on the following two rules--> <rsc_location id="VIPs" rsc="VIP-group"> <!--prefer host with more interfaces--> <rule id="VIP-prefer-connected-rule-1" score-attribute="ifcheck" > <expression id="VIP-prefer-most-connected-1" attribute="ifcheck" operation="defined"/> </rule> <!--prefer host with better ISP connectivity--> <rule id="VIP-prefer-connected-rule-2" score-attribute="ispcheck"> <expression id="VIP-prefer-most-connected-2" attribute="ispcheck" operation="defined"/> </rule> </rsc_location> <!--conntrack master must run where the VIPs are--> <rsc_colocation id="conntrack-master-with-VIPs" rsc="master-conntrackd" with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> <rsc_colocation id="condition-master-with-VIPs" rsc="master-condition" with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> <!--services masters must run where the VIPs are--> <rsc_colocation id="ospfd-master-with-VIPs" rsc="master-ospfd.init" with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> <rsc_colocation id="ripd-master-with-VIPs" rsc="master-ripd.init" with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> <rsc_colocation id="squid-master-with-VIPs" rsc="master-squid.init" with-rsc="VIP-group" rsc-role="Master" score="INFINITY" /> <!--prefer as master the following hosts in ascending order--> <rsc_location id="VIP-master-xi" rsc="VIP-group" node="xi" score="0"/> <rsc_location id="VIP-master-nu" rsc="VIP-group" node="nu" score="20"/> <rsc_location id="VIP-master-mu" rsc="VIP-group" node="mu" score="40"/> </constraints> _______________________________________________ Pacemaker mailing list: Pacemaker@oss.clusterlabs.org http://oss.clusterlabs.org/mailman/listinfo/pacemaker Project Home: http://www.clusterlabs.org Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf Bugs: http://bugs.clusterlabs.org