Hello all,
I test my 2 node cluster with 2.1.2 on SLES 9.
On my tests I removed the two network cables from the production
nics (bond0) so I expect a failover.
But there wasn't any reaction;(
When I bring all network interfaces down, the slave take the
resources. All things works fine
bond1 and bond2 are "direct links/crossover" and I can't see any
production IP via this two interfaces.
On bond0 there are two vlan interfaces. Over these interfaces I can
see the production network (see ping_group).
When I removed the link for bond0 (vlan 425/725) the master can't
ping any ip from the ping group.
On the master I get only these log entries:
-----------
heartbeat[18143]: 2007/09/06_16:43:00 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:00 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:00 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:02 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:02 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:02 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:04 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:04 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:04 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:06 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:06 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:06 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:08 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:08 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:08 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:10 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:10 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:10 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:12 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:12 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:12 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:14 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:14 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:14 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:16 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:16 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:16 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:18 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:18 ERROR: glib: Error sending
packet: Network is unreachable
heartbeat[18143]: 2007/09/06_16:43:18 ERROR: write failure on
ping_group prod_lan.: Network is unreachable
heartbeat[18132]: 2007/09/06_16:43:18 WARN: node prod_lan: is dead
heartbeat[18132]: 2007/09/06_16:43:18 info: Link prod_lan:prod_lan
dead.
crmd[18174]: 2007/09/06_16:43:18 notice: crmd_ha_status_callback:
Status update: Node prod_lan now has status [dead]
heartbeat[18132]: 2007/09/06_16:43:18 info: Link nfs00002:vlan725
dead.
heartbeat[18132]: 2007/09/06_16:43:18 info: Link nfs00002:vlan425
dead.
crmd[18174]: 2007/09/06_16:43:18 WARN: get_uuid: Could not calculate
UUID for prod_lan
Something is wrong in my config...
Any hints?
Ciao
Andre
--------------
# cat /etc/ha.d/ha.cf
keepalive 2
deadtime 15
warntime 10
initdead 25
bcast vlan725 # bond0
bcast vlan425 # bond0
bcast bond1 # direct link 192.168.1.0
bcast bond2 # direct link 192.168.2.0
node nfs00001 nfs00002
ping_group prod_lan 172.18.8.60 10.18.1.126 10.18.1.116
crm yes
use_logd yes
###################################################
# cat cib.xml
<cib admin_epoch="0" generated="false" have_quorum="true"
ignore_dtd="false" num_peers="0" cib_feature_revision="1.3"
epoch="106" num_updates="30" cib-last-written="Thu Sep 6 16:41:03
2007" ccm_transition="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-symmetric-cluster"
name="symmetric-cluster" value="true"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="stop"/>
<nvpair
id="cib-bootstrap-options-default-resource-stickiness"
name="default-resource-stickiness" value="INFINITY"/>
<nvpair
id="cib-bootstrap-options-default-resource-failure-stickiness"
name="default-resource-failure-stickiness" value="0"/>
<nvpair id="cib-bootstrap-options-stonith-enabled"
name="stonith-enabled" value="true"/>
<nvpair id="cib-bootstrap-options-stonith-action"
name="stonith-action" value="reboot"/>
<nvpair id="cib-bootstrap-options-stop-orphan-resources"
name="stop-orphan-resources" value="true"/>
<nvpair id="cib-bootstrap-options-stop-orphan-actions"
name="stop-orphan-actions" value="true"/>
<nvpair id="cib-bootstrap-options-remove-after-stop"
name="remove-after-stop" value="false"/>
<nvpair id="cib-bootstrap-options-short-resource-names"
name="short-resource-names" value="true"/>
<nvpair
id="cib-bootstrap-options-transition-idle-timeout"
name="transition-idle-timeout" value="5min"/>
<nvpair id="cib-bootstrap-options-default-action-timeout"
name="default-action-timeout" value="15s"/>
<nvpair id="cib-bootstrap-options-is-managed-default"
name="is-managed-default" value="true"/>
<nvpair id="cib-bootstrap-options-suppress_cib_writes"
name="suppress_cib_writes" value="false"/>
<nvpair id="cib-bootstrap-options-startup-fencing"
name="startup-fencing" value="true"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node uname="nfs00002" type="normal"
id="043eb3f2-485e-4647-9220-5fa850c6a4df">
<instance_attributes
id="nodes-043eb3f2-485e-4647-9220-5fa850c6a4df">
<attributes>
<nvpair name="standby"
id="standby-043eb3f2-485e-4647-9220-5fa850c6a4df" value="false"/>
</attributes>
</instance_attributes>
</node>
<node uname="nfs00001" type="normal"
id="c610a2fc-70db-4d20-90ad-d928685f1bd9">
<instance_attributes
id="nodes-c610a2fc-70db-4d20-90ad-d928685f1bd9">
<attributes>
<nvpair
id="standby-c610a2fc-70db-4d20-90ad-d928685f1bd9" name="standby"
value="false"/>
</attributes>
</instance_attributes>
</node>
</nodes>
<resources>
<clone id="PingHosts">
<instance_attributes id="pingd">
<attributes>
<nvpair id="pingd-clone_max_val" name="clone_max"
value="2"/>
<nvpair id="pingd-clone_node_max" name="clone_node_max"
value="1"/>
</attributes>
</instance_attributes>
<primitive id="pingd-child" provider="heartbeat"
class="ocf" type="pingd">
<operations>
<op id="pingd-child-monitor" name="monitor"
interval="20s" timeout="40s" prereq="nothing"/>
<op id="pingd-child-start" name="start"
prereq="nothing"/>
</operations>
<instance_attributes id="pingd_inst_attr">
<attributes>
<nvpair id="pingd-dampen" name="dampen" value="5s"/>
<nvpair id="pingd-multiplier" name="multiplier"
value="100"/>
<nvpair id="pingd-user" name="user"
value="hacluster"/>
<nvpair id="pingd-pid" name="pidfile"
value="/var/lib/heartbeat/cores/hacluster/pingd.pid"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
<clone id="NodeFencing">
<instance_attributes id="clone_attrs">
<attributes>
<nvpair id="clone_max_val" name="clone_max" value="2"/>
<nvpair id="clone_node_max_val" name="clone_node_max"
value="1"/>
</attributes>
</instance_attributes>
<primitive id="child_NodeFencing" class="stonith"
type="external/CB_STONITH.pl" provider="heartbeat">
<operations>
<op id="child_NodeFencing_mon" name="monitor"
interval="5s" timeout="20s" prereq="nothing"/>
<op id="child_NodeFencing_start" name="start"
timeout="20s" prereq="nothing"/>
</operations>
<instance_attributes id="child_NodeFencing_attrs">
<attributes>
<nvpair id="cb_stonith_hostlist" name="hostlist"
value="nfs00001-nfs00002"/>
<nvpair id="cb_stonith_iloip" name="iloip"
value="10_18_2_32-10_18_2_33"/>
<nvpair id="cb_stonith_passlist" name="passlist"
value="XXXX-XXXX"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
<group id="group_NFS">
<primitive class="ocf" id="VIP01_172_18_8_11"
provider="heartbeat" type="IPaddr2">
<operations>
<op id="VIP01_mon" interval="5s" name="monitor"
timeout="10s"/>
</operations>
<instance_attributes id="VIP01_inst_attr">
<attributes>
<nvpair id="VIP01_attr_0" name="ip"
value="172.18.8.11"/>
<nvpair id="VIP01_attr_1" name="cidr_netmask"
value="26"/>
<nvpair id="VIP01_attr_2" name="broadcast"
value="172.18.8.63"/>
<nvpair id="VIP01_attr_3" name="nic"
value="vlan425"/>
<nvpair id="VIP01_attr_4" name="iflabel"
value="vip01"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="VIP02_172_18_8_12"
provider="heartbeat" type="IPaddr2">
<operations>
<op id="VIP02_mon" interval="5s" name="monitor"
timeout="10s"/>
</operations>
<instance_attributes id="VIP02_inst_attr">
<attributes>
<nvpair id="VIP02_attr_0" name="ip"
value="172.18.8.12"/>
<nvpair id="VIP02_attr_1" name="cidr_netmask"
value="26"/>
<nvpair id="VIP02_attr_2" name="broadcast"
value="172.18.8.63"/>
<nvpair id="VIP02_attr_3" name="nic"
value="vlan425"/>
<nvpair id="VIP02_attr_4" name="iflabel"
value="vip02"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="VIP03_172_18_8_13"
provider="heartbeat" type="IPaddr2">
<operations>
<op id="VIP03_mon" interval="5s" name="monitor"
timeout="10s"/>
</operations>
<instance_attributes id="VIP03_inst_attr">
<attributes>
<nvpair id="VIP03_attr_0" name="ip"
value="172.18.8.13"/>
<nvpair id="VIP03_attr_1" name="cidr_netmask"
value="26"/>
<nvpair id="VIP03_attr_2" name="broadcast"
value="172.18.8.63"/>
<nvpair id="VIP03_attr_3" name="nic"
value="vlan425"/>
<nvpair id="VIP03_attr_4" name="iflabel"
value="vip03"/>
</attributes>
</instance_attributes>
</primitive>
</group>
</resources>
<constraints>
<rsc_location id="rsc_location_group_NFS" rsc="group_NFS">
<rule id="prefered_location_group_NFS" score="100">
<expression attribute="#uname"
id="prefered_location_group_NFS_expr" operation="eq"
value="nfs00001"/>
</rule>
</rsc_location>
<rsc_location id="pingd:connected" rsc="group_NFS">
<rule id="pingd:connected:rule"
score_attribute="default_ping_set">
<expression id="pingd:connected:expr:defined"
attribute="default_ping_set" operation="defined"/>
</rule>
</rsc_location>
</constraints>
</configuration>
</cib>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems