Hello agian,

after fixing up my VirtualIP problem, I have been doing some Split Brain tests 
and while everything 'returns to normal', it is not quite what I had desired.

My scenario:
Acive/Passive 2 node cluster (serverA & serverB) with Corosync, DRBD & PGSQL.
The resources are configured as Master/Slave and sofar it is fine.

Since bullet points speak more then words: ;)
Test:
 1) Pull the plug on the master (serverA)
 2) Then Reattach
Expected results:
 1) serverB becomes Master
 2) serverB remains Master, serverA syncs with serverB
Actual results:
 1) serverB becomes Master
 2) serverA becomes Master, data written on serverB is lost.

In all honesty, I am not an expert in HA, DRBD and Corosync. I know the basics 
but it is not my domain of excellence.
Most of my configs has been influenced... ok, blatantly copied from the net and 
tweaked until the worked.
Yet now I am at a loss.

Am I presuming something that is not possible with Corosync (which I doubt) or 
is my config wrong(probably)?
Yet I am unable to find any smoking gun.

I have visited all the sites that might hold the information, but none really 
point anything out.
Only difference I could tell was that some examples did not have the split 
brain handling in the drbd.conf.

Can someone possibly point me into the correct direction?

Thanks!

Frank

Here are the obligatory config file contents:

############### /etc/drbd.conf 

global {
  usage-count no;
}
common {
  syncer {
    rate 100M;
  }
  protocol C;
}
resource drbd0 {

  startup {
    wfc-timeout 20;
    degr-wfc-timeout 10;
  }
  disk {
    on-io-error detach;
  }
  net {
    cram-hmac-alg sha1;
    after-sb-0pri discard-zero-changes;
    after-sb-1pri discard-secondary;
    after-sb-2pri disconnect; 

  }
  on serverA {
    device /dev/drbd0;
    disk /dev/sda5;
    meta-disk internal;
    address 150.158.183.22:7788;
  }
  on serverB {
    device /dev/drbd0;
    disk /dev/sda5;
    meta-disk internal;
    address 150.158.183.23:7788;
  }
}

############### /etc/ha.d/ha.cf 

udpport 694
ucast eth0 150.158.183.23

autojoin none
debug 1
logfile /var/log/ha-log
use_logd false
logfacility daemon
keepalive 2 # 2 second(s)
deadtime 10
# warntime 10
initdead 80

# list all shared ip addresses we want to ping
ping 150.158.183.30

# list all node names
node serverB serverA
crm yes
respawn root /usr/lib/heartbeat/pingd -m 100 -d 5s

############### /etc/corosync/corosync.conf

totem {
        version: 2
        token: 1000
        hold: 180
        token_retransmits_before_loss_const: 20
        join: 60
        configuration (ms)
        consensus: 4800
        vsftype: none
        max_messages: 20
        clear_node_high_bit: yes
        secauth: off
        threads: 0
        rrp_mode: none
        interface {
                ringnumber: 0
                bindnetaddr: 150.158.183.0
                mcastaddr: 226.94.1.22
                mcastport: 5427
        }
}
amf {
        mode: disabled
}
service {
        ver: 0
        name: pacemaker
}
aisexec {
        user: root
        group: root
}
logging {
        fileline: off
        to_stderr: yes
        to_logfile: yes
        to_syslog: yes
        logfile: /var/log/corosync/corosync.log
        syslog_facility: daemon
        debug: off
        timestamp: on
        logger_subsys {
                subsys: AMF
                debug: off
                tags: enter|leave|trace1|trace2|trace3|trace4|trace6
        }
}

############### /var/lib/heartbeat/crm/cib.xml

<cib have_quorum="true" generated="true" ignore_dtd="false" epoch="14" 
num_updates="0" admin_epoch="0" validate-with="transitional-0.6" 
cib-last-written="Wed Feb  9 14:03:30 2011" crm_feature_set="3.0.1" 
have-quorum="0" dc-uuid="serverA">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <attributes>
          <nvpair id="option_1" name="symmetric_cluster" value="true"/>
          <nvpair id="option_2" name="no_quorum_policy" value="ignore"/>
          <nvpair id="option_3" name="stonith_enabled" value="false"/>
          <nvpair id="option_9" name="default-resource-stickiness" 
value="1000"/>
          <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" 
value="1.0.9-74392a28b7f31d7ddc86689598bd23114f58978b"/>
          <nvpair id="cib-bootstrap-options-cluster-infrastructure" 
name="cluster-infrastructure" value="openais"/>
          <nvpair id="cib-bootstrap-options-expected-quorum-votes" 
name="expected-quorum-votes" value="2"/>
        </attributes>
      </cluster_property_set>
    </crm_config>
    <nodes>
      <node id="serverA" uname="serverA" type="normal"/>
      <node id="serverB" uname="serverB" type="normal"/>
    </nodes>
    <resources>
      <master_slave id="ms_drbd0">
        <meta_attributes id="ma-ms_drbd0">
          <attributes>
            <nvpair id="ma-ms-drbd0-1" name="clone_max" value="2"/>
            <nvpair id="ma-ms-drbd0-2" name="clone_node_max" value="1"/>
            <nvpair id="ma-ms-drbd0-3" name="master_max" value="1"/>
            <nvpair id="ma-ms-drbd0-4" name="master_node_max" value="1"/>
            <nvpair id="ma-ms-drbd0-5" name="notify" value="yes"/>
            <nvpair id="ma-ms-drbd0-6" name="globally_unique" value="false"/>
            <nvpair id="ma-ms-drbd0-7" name="target_role" value="started"/>
          </attributes>
        </meta_attributes>
        <primitive class="ocf" type="drbd" provider="heartbeat" 
id="drbddisk_rep">
          <instance_attributes id="drbddisk_rep_ias">
            <attributes>
              <nvpair id="drbd_primary_ia_failover_1" name="drbd_resource" 
value="drbd0"/>
              <nvpair id="drbd_primary_ia_failover_2" name="target_role" 
value="started"/>
              <nvpair id="drbd_primary_ia_failover_3" name="ignore_deprecation" 
value="true"/>
            </attributes>
          </instance_attributes>
          <operations>
            <op id="ms_drbd_mysql-monitor-master" name="monitor" interval="29s" 
timeout="10s" role="Master"/>
            <op id="ms_drbd_mysql-monitor-slave" name="monitor" interval="30s" 
timeout="10s" role="Slave"/>
          </operations>
        </primitive>
      </master_slave>
      <group id="rg_drbd" ordered="true">
        <meta_attributes id="ma-apache">
          <attributes>
            <nvpair id="ia-at-fs0" name="target_role" value="started"/>
          </attributes>
        </meta_attributes>
        <primitive id="ip_resource" class="ocf" type="IPaddr2" 
provider="heartbeat">
          <instance_attributes id="virtual-ip-attribs">
            <attributes>
              <nvpair id="virtual-ip-addr" name="ip" value="150.158.183.30"/>
              <nvpair id="virtual-ip-addr-nic" name="nic" value="eth0"/>
              <nvpair id="virtual-ip-addr-netmask" name="cidr_netmask" 
value="22"/>
              <nvpair id="virtual-ip-addr-iflabel" name="iflabel" value="0"/>
            </attributes>
          </instance_attributes>
          <operations>
            <op id="virtual-ip-monitor-10s" interval="10s" name="monitor"/>
          </operations>
        </primitive>
        <primitive class="ocf" provider="heartbeat" type="Filesystem" id="fs0">
          <instance_attributes id="ia-fs0">
            <attributes>
              <nvpair id="ia-fs0-1" name="fstype" value="ext3"/>
              <nvpair id="ia-fs0-2" name="directory" value="/mnt/rep"/>
              <nvpair id="ia-fs0-3" name="device" value="/dev/drbd0"/>
              <nvpair id="ia-fs0-4" name="options" 
value="noatime,nodiratime,barrier=0"/>
            </attributes>
          </instance_attributes>
        </primitive>
        <primitive id="pgsql" class="ocf" type="pgsql" provider="heartbeat">
          <instance_attributes id="pgsql-instance_attributes">
            <attributes>
              <nvpair id="pgsql-instance_attributes-pgdata" name="pgdata" 
value="/mnt/rep/pgsql/data"/>
              <nvpair id="pgsql-instance_attributes-pgctl" name="pgctl" 
value="/usr/lib/postgresql/8.3/bin/pg_ctl"/>
              <nvpair id="pgsql-instance_attributes-pgport" name="pgport" 
value="5432"/>
            </attributes>
          </instance_attributes>
          <operations>
            <op id="psql-monitor-30s" timeout="30s" interval="30s" 
name="monitor"/>
          </operations>
        </primitive>
      </group>
    </resources>
    <constraints>
      <rsc_location id="drbd0-placement-1" rsc="ms_drbd0">
        <rule id="drbd0-rule-1" score="-INFINITY">
          <expression id="exp-01" value="serverA" attribute="#uname" 
operation="ne"/>
          <expression id="exp-02" value="serverB" attribute="#uname" 
operation="ne"/>
        </rule>
        <rule id="drbd0-master-on-1" role="master" score="100">
          <expression id="exp-1" attribute="#uname" operation="eq" 
value="serverA"/>
        </rule>
      </rsc_location>
      <rsc_order id="mount_after_drbd" from="rg_drbd" action="start" 
to="ms_drbd0" to_action="promote"/>
      <rsc_colocation id="mount_on_drbd" to="ms_drbd0" to_role="master" 
from="rg_drbd" score="INFINITY"/>
    </constraints>
  </configuration>
</cib>


-- 
Empfehlen Sie GMX DSL Ihren Freunden und Bekannten und wir
belohnen Sie mit bis zu 50,- Euro! https://freundschaftswerbung.gmx.de

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker

Reply via email to