Hallo!
Today we are testing desaster scenarios with our heartbeat / drbd cluster.
Our first testcase was to make node A totally powerless without shutdown.
Heartbeat sees the resource offline an tries to do a stonith to make sure
its really dead. Because also the ilo-connect is powerless the stonith
failed. Then heartbeat hangs up and no services get started on the remaining
node. Is there any solution for this behaviour?
Packages: (SLES10 SP2, standard pakages)
heartbeat-ldirectord-2.1.3-0.9
heartbeat-2.1.3-0.9
heartbeat-cmpi-2.1.3-0.9
heartbeat-stonith-2.1.3-0.9
heartbeat-pils-2.1.3-0.9
yast2-heartbeat-2.13.13-0.3
drbd-kmp-bigsmp-0.7.22_2.6.16.60_0.21-42.16
drbd-0.7.22-42.16
I will attach the current cib on this message.
Any help much appreciated,
Christian
<cib generated="true" admin_epoch="0" have_quorum="true" ignore_dtd="false" num_peers="1" cib_feature_revision="2.0" crm_feature_set="2.0" epoch="299" num_updates="1" cib-last-written="Wed Aug 27 11:59:35 2008" ccm_transition="1" dc_uuid="9cf297ca-957a-4c80-abd0-41bbc60efbee">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.1.3-node: a3184d5240c6e7032aef9cce6e5b7752ded544b3"/>
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
<nvpair id="cib-bootstrap-options-default-resource-stickiness" name="default-resource-stickiness" value="100"/>
<nvpair id="cib-bootstrap-options-default-resource-failure-stickiness" name="default-resource-failure-stickiness" value="-100"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="9cf297ca-957a-4c80-abd0-41bbc60efbee" uname="node1" type="normal"/>
<node id="0384caf5-33f8-4332-8264-593ee5a818cc" uname="node2" type="normal"/>
</nodes>
<resources>
<group id="group_basis" ordered="true" collocated="true">
<primitive class="heartbeat" id="drbddisk_resource" provider="heartbeat" type="drbddisk">
<operations>
<op id="drbddisk_mon" interval="120s" name="monitor" timeout="60s"/>
<op id="drbddisk_start" name="start" timeout="15s" start_delay="60s"/>
</operations>
<instance_attributes id="drbddisk_inst_attr">
<attributes>
<nvpair id="drbddisk_attr_1" name="1" value="r0"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="filesystem_resource" provider="heartbeat" type="Filesystem">
<operations>
<op id="filesystem_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
<instance_attributes id="filesystem_inst_attr">
<attributes>
<nvpair id="filesystem_attr_0" name="device" value="/dev/drbd0"/>
<nvpair id="filesystem_attr_1" name="directory" value="/data"/>
<nvpair id="filesystem_attr_2" name="fstype" value="ext3"/>
<nvpair id="filesystem_attr_3" name="options" value="defaults"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="IPaddr_192_168_80_150" provider="heartbeat" type="IPaddr">
<operations>
<op id="IPaddr_192_168_80_150_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="IPaddr_192_168_80_150_inst_attr">
<attributes>
<nvpair id="IPaddr_192_168_80_150_attr_0" name="ip" value="192.168.80.150"/>
<nvpair id="IPaddr_192_168_80_150_attr_1" name="netmask" value="255.255.255.0"/>
<nvpair id="IPaddr_192_168_80_150_attr_2" name="nic" value="eth0"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="IPaddr_192_168_74_150" provider="heartbeat" type="IPaddr">
<operations>
<op id="IPaddr_192_168_74_150_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="IPaddr_192_168_74_150_inst_attr">
<attributes>
<nvpair id="IPaddr_192_168_74_150_attr_0" name="ip" value="192.168.74.150"/>
<nvpair id="IPaddr_192_168_74_150_attr_1" name="netmask" value="255.255.255.0"/>
<nvpair id="IPaddr_192_168_74_150_attr_2" name="nic" value="eth3"/>
</attributes>
</instance_attributes>
</primitive>
<instance_attributes id="group_basis">
<attributes>
<nvpair id="group_basis-target_role" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</group>
<group ordered="true" collocated="true" id="group_soft1">
<primitive class="lsb" type="mysql" provider="heartbeat" id="mysql_resource">
<operations>
<op id="mysql_mon" interval="60s" name="monitor" timeout="30s"/>
</operations>
</primitive>
<primitive class="lsb" type="tomcat-1" provider="heartbeat" id="tomcat-1_resource">
<operations>
<op id="tomcat-1_mon" interval="60s" name="monitor" timeout="30s"/>
</operations>
</primitive>
<primitive class="ocf" id="httpd2_resource" provider="heartbeat" type="apache">
<operations>
<op id="httpd2_mon" interval="60s" name="monitor" timeout="30s" start_delay="30s"/>
</operations>
<instance_attributes id="httpd2_inst_attr">
<attributes>
<nvpair id="httpd2_attr_0" name="configfile" value="/data/apache/default/conf/httpd.conf"/>
<nvpair id="httpd2_attr_1" name="httpd" value="/data/apache/default/bin/httpd"/>
<nvpair id="httpd2_attr_2" name="statusurl" value="http://localhost/server-status"/>
</attributes>
</instance_attributes>
</primitive>
<instance_attributes id="group_soft1">
<attributes>
<nvpair id="group_soft1-target_role" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</group>
<group ordered="true" collocated="true" id="group_soft2">
<primitive id="soft2-1_resource" class="lsb" type="soft2-1" provider="heartbeat">
<operations>
<op id="soft2-1_mon" interval="60s" name="monitor" timeout="30s"/>
</operations>
</primitive>
<instance_attributes id="group_soft2">
<attributes>
<nvpair id="group_soft2-target_role" name="target_role" value="started"/>
</attributes>
</instance_attributes>
</group>
<clone id="Fence-node1">
<instance_attributes id="bd59039b-9735-4f99-a8a8-7852cb8a44d8">
<attributes>
<nvpair name="clone_max" value="1" id="01a28dfc-880b-4e0d-ac46-d9beeb2f193a"/>
<nvpair name="clone_node_max" value="1" id="e7c565d9-bc96-40ff-9473-967ae3cfd35c"/>
</attributes>
</instance_attributes>
<primitive id="child_Fence-node1" class="stonith" type="external/riloe">
<operations>
<op name="monitor" interval="5s" timeout="20s" prereq="nothing" id="ef5aefca-b1bd-4bcb-bd7a-03171267749e"/>
<op name="start" timeout="20s" prereq="nothing" id="3f762aae-93f6-4086-b6ef-bf2264aa61ab"/>
</operations>
<instance_attributes id="f53d6dff-5ae1-4424-a110-c23824c0a881">
<attributes>
<nvpair name="hostlist" value="node1" id="bc952934-2ab7-4ec3-b119-38e2522bde7d"/>
<nvpair name="ilo_hostname" value="192.168.74.143" id="fe255ff7-e131-4c55-91bc-f148a58df80a"/>
<nvpair name="ilo_user" value="Admin" id="98dc8ddc-7b22-44f5-9ae2-215bbdb3549f"/>
<nvpair name="ilo_password" value="XXXXX" id="bf1a0945-d42e-47fd-9716-fdee1ecfd691"/>
<nvpair name="ilo_protocol" value="2.0" id="ee889b34-36a0-4aa2-ab36-0baa2ad0abd3"/>
<nvpair name="ilo_can_reset" value="1" id="1ded55c9-2c6c-4b99-b467-063db8f2fa0f"/>
<nvpair name="ilo_powerdown_method" value="button" id="519a1681-7cae-4c7f-9a3b-78210a6ab677"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
<clone id="Fence-node2">
<instance_attributes id="08b6a6d3-48d3-4019-9b54-c769526e7bce">
<attributes>
<nvpair name="clone_max" value="1" id="30d6ec40-8287-406a-be17-111a720bb4be"/>
<nvpair name="clone_node_max" value="1" id="e73b4f08-6c8a-4475-9248-3425068d7232"/>
</attributes>
</instance_attributes>
<primitive id="child_Fence-node2" class="stonith" type="external/riloe">
<operations>
<op name="monitor" interval="5s" timeout="20s" prereq="nothing" id="e6ba01df-1ad5-4e7c-b8ee-f99617d06407"/>
<op name="start" timeout="20s" prereq="nothing" id="2a838392-319a-48ab-a349-44f5ea76afac"/>
</operations>
<instance_attributes id="e89057b2-f593-4706-b34f-05f60ded251e">
<attributes>
<nvpair name="hostlist" value="node2" id="447ff7f1-9591-4306-ac56-47b4ae2a84a4"/>
<nvpair name="ilo_hostname" value="192.168.74.144" id="15820dce-ed48-4863-8df0-e46b837d8221"/>
<nvpair name="ilo_user" value="Admin" id="dc099c21-9004-4ffc-80b7-2e735e1b11c5"/>
<nvpair name="ilo_password" value="XXXXX" id="32b60bcf-89db-4d7d-bbfb-4fde631a58a4"/>
<nvpair name="ilo_protocol" value="2.0" id="cb2e261d-9a6d-42c5-bf62-6fe88726c412"/>
<nvpair name="ilo_can_reset" value="1" id="550c01aa-417b-47d9-bac7-1ec9959cbca5"/>
<nvpair name="ilo_powerdown_method" value="button" id="c1c9d6eb-c098-4b87-a725-64a58d4f7a67"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
</resources>
<constraints>
<rsc_colocation id="soft1_on_basis" to="group_basis" from="group_soft1" score="INFINITY"/>
<rsc_order id="basis_before_soft1" from="group_soft1" action="start" to="group_basis" to_action="start" type="after"/>
<rsc_colocation id="soft2_on_basis" to="group_basis" from="group_soft2" score="INFINITY"/>
<rsc_order id="basis_before_soft2" from="group_soft2" action="start" to="group_basis" to_action="start" type="after"/>
<rsc_location id="stonith-constraint-node1:1" rsc="Fence-node1">
<rule id="stonith-constraint-rule-node1:1" score="-INFINITY">
<expression id="stonith-constraint-rule-expression-node1:1" attribute="#uname" operation="eq" value="node1"/>
</rule>
</rsc_location>
<rsc_location id="stonith-constraint-node1:2" rsc="Fence-node1">
<rule id="stonith-constraint-rule-node1:2" score="1000">
<expression id="stonith-constraint-rule-expression-node1:2" attribute="#uname" operation="eq" value="node2"/>
</rule>
</rsc_location>
<rsc_location id="stonith-constraint-node2:1" rsc="Fence-node2">
<rule id="stonith-constraint-rule-node2:1" score="1000">
<expression id="stonith-constraint-rule-expression-node2:1" attribute="#uname" operation="eq" value="node1"/>
</rule>
</rsc_location>
<rsc_location id="stonith-constraint-node2:2" rsc="Fence-node2">
<rule id="stonith-constraint-rule-node2:2" score="-INFINITY">
<expression id="stonith-constraint-rule-expression-node2:2" attribute="#uname" operation="eq" value="node2"/>
</rule>
</rsc_location>
</constraints>
</configuration>
<status>
<node_state id="9cf297ca-957a-4c80-abd0-41bbc60efbee" uname="node1" crmd="online" crm-debug-origin="do_update_resource" shutdown="0" in_ccm="true" ha="active" join="member" expected="member">
<lrm id="9cf297ca-957a-4c80-abd0-41bbc60efbee">
<lrm_resources>
<lrm_resource id="drbddisk_resource" type="drbddisk" class="heartbeat" provider="heartbeat">
<lrm_rsc_op id="drbddisk_resource_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="4:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;4:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="2" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="eaef18223bea84e8b011465368d9f74e"/>
</lrm_resource>
<lrm_resource id="IPaddr_192_168_80_150" type="IPaddr" class="ocf" provider="heartbeat">
<lrm_rsc_op id="IPaddr_192_168_80_150_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="6:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;6:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="4" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="c541f53eb82a526c7784ef8200486ecb"/>
</lrm_resource>
<lrm_resource id="IPaddr_192_168_74_150" type="IPaddr" class="ocf" provider="heartbeat">
<lrm_rsc_op id="IPaddr_192_168_74_150_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="7:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;7:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="5" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="69b2aefa4287727f48e6a43e638b662c"/>
</lrm_resource>
<lrm_resource id="filesystem_resource" type="Filesystem" class="ocf" provider="heartbeat">
<lrm_rsc_op id="filesystem_resource_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="5:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;5:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="3" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="8f4d683eb05aa05ca5a40898e90b3c27"/>
</lrm_resource>
<lrm_resource id="httpd2_resource" type="apache" class="ocf" provider="heartbeat">
<lrm_rsc_op id="httpd2_resource_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="10:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;10:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="8" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="cc3ddc9b1721229bb7c3469d936ed308"/>
</lrm_resource>
<lrm_resource id="soft2-1_resource" type="soft2-1" class="lsb" provider="heartbeat">
<lrm_rsc_op id="soft2-1_resource_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="11:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;11:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="9" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="tomcat-1_resource" type="tomcat-1" class="lsb" provider="heartbeat">
<lrm_rsc_op id="tomcat-1_resource_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="9:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;9:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="7" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="mysql_resource" type="mysql" class="lsb" provider="heartbeat">
<lrm_rsc_op id="mysql_resource_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="8:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;8:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="6" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
</lrm_resource>
<lrm_resource id="child_Fence-node1:0" type="external/riloe" class="stonith">
<lrm_rsc_op id="child_Fence-node1:0_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="12:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;12:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="10" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="7af449239539b94ffa9ee77d1187757a"/>
</lrm_resource>
<lrm_resource id="child_Fence-node2:0" type="external/riloe" class="stonith">
<lrm_rsc_op id="child_Fence-node2:0_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" transition_key="13:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:7;13:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="11" crm_feature_set="2.0" rc_code="7" op_status="0" interval="0" op_digest="ced891a1a35f6891cf5c3a45f00b9dda"/>
<lrm_rsc_op id="child_Fence-node2:0_start_0" operation="start" crm-debug-origin="do_update_resource" transition_key="46:0:970b2323-2594-4eee-aabb-248e4f564759" transition_magic="0:1;46:0:970b2323-2594-4eee-aabb-248e4f564759" call_id="12" crm_feature_set="2.0" rc_code="1" op_status="0" interval="0" op_digest="ced891a1a35f6891cf5c3a45f00b9dda"/>
</lrm_resource>
</lrm_resources>
</lrm>
<transient_attributes id="9cf297ca-957a-4c80-abd0-41bbc60efbee">
<instance_attributes id="status-9cf297ca-957a-4c80-abd0-41bbc60efbee">
<attributes>
<nvpair id="status-9cf297ca-957a-4c80-abd0-41bbc60efbee-probe_complete" name="probe_complete" value="true"/>
<nvpair id="status-9cf297ca-957a-4c80-abd0-41bbc60efbee-fail-count-child_Fence-node2:0" name="fail-count-child_Fence-node2:0" value="INFINITY"/>
</attributes>
</instance_attributes>
</transient_attributes>
</node_state>
</status>
</cib>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems