Re: [Openais] [PATCH 1/2] CTS: increase the token timeout on some tests
good for merge regards -steve On Tue, 2010-04-20 at 13:09 +1000, Angus Salkeld wrote: > This increases the token timeout on some tests to > test that config events still happen. > > -Angus > > Signed-off-by: Angus Salkeld > --- > cts/corotests.py | 12 ++-- > 1 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/cts/corotests.py b/cts/corotests.py > index a91e0bd..eaad43f 100644 > --- a/cts/corotests.py > +++ b/cts/corotests.py > @@ -131,7 +131,7 @@ class CpgConfigChangeBase(CoroTest): > > def wait_for_config_change(self): > found = False > -max_timeout = 5 * 60 > +max_timeout = 30 * 60 > waited = 0 > printit = 0 > self.CM.log("Waiting for config change on " + self.listener) > @@ -150,7 +150,7 @@ class CpgConfigChangeBase(CoroTest): > waited = waited + 1 > printit = printit + 1 > if printit is 60: > -print 'waited 60 seconds' > +print 'waited ' + str(waited) + ' seconds' > printit = 0 > > elif str(event.node_id) in str(self.wobbly_id) and not > event.is_member: > @@ -1012,14 +1012,14 @@ def CoroTestList(cm, audits): > default['uidgid/gid'] = '0' > configs.append(default) > > -a = ConfigContainer('none_1') > +a = ConfigContainer('none_5min') > a['compatibility'] = 'none' > -a['totem/token'] = 1 > +a['totem/token'] = (4 * 60 * 1000) > configs.append(a) > > -b = ConfigContainer('whitetank_1') > +b = ConfigContainer('whitetank_5min') > b['compatibility'] = 'whitetank' > -b['totem/token'] = 1 > +b['totem/token'] = (4 * 60 * 1000) > configs.append(b) > > c = ConfigContainer('sec_nss') ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] [PATCH 2/2] CTS: add CpgCfgChgOnNodeRestart
good for merge regards -steve On Tue, 2010-04-20 at 13:10 +1000, Angus Salkeld wrote: > Add a new test that will > 1 isolate a node > 2 kill corosync > 3 restart it > 4 confirm the cpg left config event happens > > -Angus > > Signed-off-by: Angus Salkeld > --- > cts/corotests.py | 31 +++ > 1 files changed, 31 insertions(+), 0 deletions(-) > > diff --git a/cts/corotests.py b/cts/corotests.py > index eaad43f..8aa754a 100644 > --- a/cts/corotests.py > +++ b/cts/corotests.py > @@ -325,6 +325,36 @@ class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase): > return CpgConfigChangeBase.teardown(self, node) > > ### > +class CpgCfgChgOnNodeRestart(CpgConfigChangeBase): > + > +def __init__(self, cm): > +CpgConfigChangeBase.__init__(self,cm) > +self.name="CpgCfgChgOnNodeRestart" > + > +def config_valid(self, config): > +if config.has_key('totem/rrp_mode'): > +return False > +else: > +return True > + > +def failure_action(self): > +self.CM.log("isolating node " + self.wobbly) > +self.CM.isolate_node(self.wobbly) > +self.CM.log("Restarting corosync on " + self.wobbly) > +self.CM.rsh(self.wobbly, "killall -9 corosync") > +self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") > +self.CM.StartaCM(self.wobbly) > + > +def __call__(self, node): > +self.incr("calls") > +self.failure_action() > +return self.wait_for_config_change() > + > +def teardown(self, node): > +self.CM.unisolate_node (self.wobbly) > +return CpgConfigChangeBase.teardown(self, node) > + > +### > class CpgMsgOrderBase(CoroTest): > > def __init__(self, cm): > @@ -961,6 +991,7 @@ GenTestClasses.append(CpgCfgChgOnExecCrash) > GenTestClasses.append(CpgCfgChgOnGroupLeave) > GenTestClasses.append(CpgCfgChgOnNodeLeave) > GenTestClasses.append(CpgCfgChgOnNodeIsolate) > +GenTestClasses.append(CpgCfgChgOnNodeRestart) > GenTestClasses.append(CpgCfgChgOnLowestNodeJoin) > GenTestClasses.append(VoteQuorumGoDown) > GenTestClasses.append(VoteQuorumGoUp) ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
[Openais] [PATCH 2/2] CTS: add CpgCfgChgOnNodeRestart
Add a new test that will 1 isolate a node 2 kill corosync 3 restart it 4 confirm the cpg left config event happens -Angus Signed-off-by: Angus Salkeld --- cts/corotests.py | 31 +++ 1 files changed, 31 insertions(+), 0 deletions(-) diff --git a/cts/corotests.py b/cts/corotests.py index eaad43f..8aa754a 100644 --- a/cts/corotests.py +++ b/cts/corotests.py @@ -325,6 +325,36 @@ class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase): return CpgConfigChangeBase.teardown(self, node) ### +class CpgCfgChgOnNodeRestart(CpgConfigChangeBase): + +def __init__(self, cm): +CpgConfigChangeBase.__init__(self,cm) +self.name="CpgCfgChgOnNodeRestart" + +def config_valid(self, config): +if config.has_key('totem/rrp_mode'): +return False +else: +return True + +def failure_action(self): +self.CM.log("isolating node " + self.wobbly) +self.CM.isolate_node(self.wobbly) +self.CM.log("Restarting corosync on " + self.wobbly) +self.CM.rsh(self.wobbly, "killall -9 corosync") +self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") +self.CM.StartaCM(self.wobbly) + +def __call__(self, node): +self.incr("calls") +self.failure_action() +return self.wait_for_config_change() + +def teardown(self, node): +self.CM.unisolate_node (self.wobbly) +return CpgConfigChangeBase.teardown(self, node) + +### class CpgMsgOrderBase(CoroTest): def __init__(self, cm): @@ -961,6 +991,7 @@ GenTestClasses.append(CpgCfgChgOnExecCrash) GenTestClasses.append(CpgCfgChgOnGroupLeave) GenTestClasses.append(CpgCfgChgOnNodeLeave) GenTestClasses.append(CpgCfgChgOnNodeIsolate) +GenTestClasses.append(CpgCfgChgOnNodeRestart) GenTestClasses.append(CpgCfgChgOnLowestNodeJoin) GenTestClasses.append(VoteQuorumGoDown) GenTestClasses.append(VoteQuorumGoUp) -- 1.6.6.1 ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
[Openais] [PATCH 1/2] CTS: increase the token timeout on some tests
This increases the token timeout on some tests to test that config events still happen. -Angus Signed-off-by: Angus Salkeld --- cts/corotests.py | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cts/corotests.py b/cts/corotests.py index a91e0bd..eaad43f 100644 --- a/cts/corotests.py +++ b/cts/corotests.py @@ -131,7 +131,7 @@ class CpgConfigChangeBase(CoroTest): def wait_for_config_change(self): found = False -max_timeout = 5 * 60 +max_timeout = 30 * 60 waited = 0 printit = 0 self.CM.log("Waiting for config change on " + self.listener) @@ -150,7 +150,7 @@ class CpgConfigChangeBase(CoroTest): waited = waited + 1 printit = printit + 1 if printit is 60: -print 'waited 60 seconds' +print 'waited ' + str(waited) + ' seconds' printit = 0 elif str(event.node_id) in str(self.wobbly_id) and not event.is_member: @@ -1012,14 +1012,14 @@ def CoroTestList(cm, audits): default['uidgid/gid'] = '0' configs.append(default) -a = ConfigContainer('none_1') +a = ConfigContainer('none_5min') a['compatibility'] = 'none' -a['totem/token'] = 1 +a['totem/token'] = (4 * 60 * 1000) configs.append(a) -b = ConfigContainer('whitetank_1') +b = ConfigContainer('whitetank_5min') b['compatibility'] = 'whitetank' -b['totem/token'] = 1 +b['totem/token'] = (4 * 60 * 1000) configs.append(b) c = ConfigContainer('sec_nss') -- 1.6.6.1 ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
[Openais] [PATCH 2/2] Add autobuild.sh script.
This add a script for buildbot to call. It uses mock to generate the rpms and scp's them onto the test nodes, then runs cts. Signed-off-by: Angus Salkeld --- autobuild.sh | 97 ++ 1 files changed, 97 insertions(+), 0 deletions(-) create mode 100755 autobuild.sh diff --git a/autobuild.sh b/autobuild.sh new file mode 100755 index 000..21c3b61 --- /dev/null +++ b/autobuild.sh @@ -0,0 +1,97 @@ +#!/bin/sh +# +# This script is called by auto-build to test +# corosync. It is run continously to help catch regressions. +# +# ENVIRONMENT variables that affect it's behaviour: +# +# TEST_NODES - the hostnames of the nodes to be tested +# TARGET - this is used by mock so look in /etc/mock for +# possible options. +# + +# required packages +which mock >/dev/null 2>&1 +if [ $? -ne 0 ] +then + echo 'please install mock (yum install mock).' +exit 1 +fi + +MOCK=$(which mock) + +set -e + +echo 'running autogen ...' +./autogen.sh + +echo 'running configure ...' +./configure + +echo 'building source rpm' +rm -f *.src.rpm +make srpm +SRPM=$(ls *src.rpm) + +if [ ! -f $SRPM ] +then + echo $0 no source rpm to build from! + exit 1 +fi + +if [ -z "$TARGET" ] +then + TARGET=fedora-12-x86_64 +fi + +RPM_DIR=/var/lib/mock/$TARGET/result +rm -f $RPM_DIR/corosync*.rpm + +echo "running mock init ($TARGET)" +$MOCK -r $TARGET --init +echo "running mock rebuild ($SRPM)" +$MOCK -r $TARGET --rebuild $SRPM --with testagents + +if [ -z "$TEST_NODES" ] +then + echo no test nodes, exiting without running cts. + exit 0 +else + # start the VMs, or leave them running? + true +fi + +RPM_LIST= +for r in $RPM_DIR/corosync*.rpm +do + case $r in +*src.rpm) +;; +*-devel-*) +;; +#*debuginfo*) +#;; +*) +RPM_LIST="$RPM_LIST $r" +;; + esac +done + + +echo installing $RPM_LIST +echo onto the test nodes $TEST_NODES + +# load and install rpm(s) onto the nodes +for n in $TEST_NODES +do + ssh $n "rm -rf /tmp/corosync*.rpm" + scp $RPM_LIST $n:/tmp/ +ssh $n "rpm --force -Uvf /tmp/corosync*.rpm" +done + +echo 'running test ...' +pushd cts +# needs sudo to read /var/log/messages +sudo -n ./corolab.py --nodes "$TEST_NODES" +popd + -- 1.6.6.1 ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
[Openais] [PATCH 1/2] Fix mock --with testagents
Hi This makes "make dist" with --enable-testagents work. -Angus Signed-off-by: Angus Salkeld --- corosync.spec.in |4 cts/agents/Makefile.am | 10 -- lib/Makefile.am|8 +--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/corosync.spec.in b/corosync.spec.in index 97bcb2e..a05840a 100644 --- a/corosync.spec.in +++ b/corosync.spec.in @@ -135,6 +135,10 @@ This package contains corosync test agents. %{_datadir}/corosync/tests/mem_leak_test.sh %{_datadir}/corosync/tests/net_breaker.sh %{_bindir}/cpg_test_agent +%{_bindir}/confdb_test_agent +%{_bindir}/sam_test_agent +%{_bindir}/votequorum_test_agent +%{_libexecdir}/lcrso/service_syncv2.lcrso %endif diff --git a/cts/agents/Makefile.am b/cts/agents/Makefile.am index 9a1b31b..4d4c862 100644 --- a/cts/agents/Makefile.am +++ b/cts/agents/Makefile.am @@ -30,11 +30,14 @@ # THE POSSIBILITY OF SUCH DAMAGE. MAINTAINERCLEANFILES = Makefile.in -INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/include/corosync SOURCES = TEST_AGENTS = cpg_test_agent confdb_test_agent sam_test_agent votequorum_test_agent +EXTRA_DIST = syncv2.c + if INSTALL_TESTAGENTS agentdir = $(datadir)/$(PACKAGE)/tests bin_PROGRAMS = $(TEST_AGENTS) @@ -43,13 +46,13 @@ dist_agent_SCRIPTS = mem_leak_test.sh net_breaker.sh AM_CFLAGS = -fPIC SERVICE_LCRSO = syncv2 SOURCES+= $(SERVICE_LCRSO:%=%.c) -EXTRA_DIST = $(SOURCES) LCRSO = $(SERVICE_LCRSO:%=service_%.lcrso) LCRSO_OBJS = $(SOURCES:%.c=%.o) else noinst_PROGRAMS = $(TEST_AGENTS) noinst_SCRIPTS = mem_leak_test.sh net_breaker.sh LCRSO = +LCRSO_OBJS = endif noinst_HEADERS = common_test_agent.h @@ -91,6 +94,9 @@ endif if INSTALL_TESTAGENTS +syncv2.o: syncv2.c + $(CC) $(AM_CFLAGS) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) -c -o $@ $< + all-local: $(LCRSO_OBJS) $(LCRSO) @echo Built Service Engines diff --git a/lib/Makefile.am b/lib/Makefile.am index 7e4974e..2a16b3c 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -107,12 +107,14 @@ libcoroipcc.so.$(SONAME): coroipcc.o ln -sf libcoroipcc.so.$(SONAME) libcoroipcc.so.$(SOMAJOR) lib%.so: lib%.a libcoroipcc.so.$(SONAME) + if [ ! "$@" = "libcoroipcc.so" ] ; then \ $(CC) -shared -o $...@.$(call get_soname,$*) \ -Wl,-soname=lib$*.so.$(call get_major,$*) \ -Wl,-version-script=$(srcdir)/lib$*.versions \ - -Wl,-whole-archive $^ -Wl,-no-whole-archive $(LDFLAGS) $(AM_LDFLAGS) - ln -sf lib$*.so.$(call get_soname,$*) lib$*.so - ln -sf lib$*.so.$(call get_soname,$*) lib$*.so.$(call get_major,$*) + -Wl,-whole-archive $^ -Wl,-no-whole-archive $(LDFLAGS) $(AM_LDFLAGS) ;\ + ln -sf lib$*.so.$(call get_soname,$*) lib$*.so ;\ + ln -sf lib$*.so.$(call get_soname,$*) lib$*.so.$(call get_major,$*) ;\ + fi endif -- 1.6.6.1 ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] corosync - CPG model_init + callback with totem ringid and members
On Thu, Apr 08, 2010 at 04:57:22PM +0200, Jan Friesse wrote: > commit 0d509f4bf23f618c940c3bcdd7cf0e97faf64876 > Author: Jan Friesse > Date: Thu Apr 8 16:48:45 2010 +0200 > > CPG model_initialize and ringid + members callback > > Patch adds new function to initialize cpg, cpg_model_initialize. Model > is set of callbacks. With this function, future addions of models > should be possible without changing the ABI. > > Patch also contains callback in CPG_MODEL_V1 for notification about > Totem membership changes. I've been doing extensive testing with this patch, and it's working well (2010-04-08-cpg_model+totem_cb.patch); ack from me on going ahead with it. Dave ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] It is possible to use the same multicast address on a network with the openais services configured for different UDP ports
What about redundant ring configuration ? Can you use the same multicast ip in different rings? Sincerely yours, Vadym Chepkov --- On Mon, 4/19/10, Steven Dake wrote: > From: Steven Dake > Subject: Re: [Openais] It is possible to use the same multicast address on a > network with the openais services configured for different UDP ports > To: "manik gaur" > Cc: open...@lists.osdl.org > Date: Monday, April 19, 2010, 12:31 PM > On Thu, 2010-04-15 at 18:36 +0530, > manik gaur wrote: > > I am trying to configure openais as a different > service uses different > > ports. It is written in the openais.conf man page. > > > > “It is possible to use the same multicast address on > a network with > > the openais services configured for different UDP > ports” > > > > but as I am new to the openais I don’t know where to > start. > > > > > > You can set two unique clusters by using the same multicast > address for > both clusters, but setting the "port" field in the config > file > differently. (ateast 2 ports higher or lower). > > Regards > -steve > > > > If any body has the solution please let me know. > > > > > > ___ > > Openais mailing list > > Openais@lists.linux-foundation.org > > https://lists.linux-foundation.org/mailman/listinfo/openais > > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/openais ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] corosync - CPG model_init + callback with totem ringid and members
good for merge On Thu, 2010-04-08 at 16:57 +0200, Jan Friesse wrote: > Included is patch solving 2nd problem. > > In first problem, I agree with Chrissie, and really don't have any > single idea how to make regular confchg precede totem_confchg. > > Christine Caulfield wrote: > > On 07/04/10 20:32, David Teigland wrote: > >> On Tue, Apr 06, 2010 at 02:05:00PM +0200, Jan Friesse wrote: > >>> Same patch but rebased on top of Steve's change (today trunk). > >> > >> Thanks, this is mostly working well, but I've found one problem, and one > >> additional thing I need (mentioned on irc already): > >> > >> 1. When a node joins, I get the totem callback before the corresponding > >> confchg callback. When a node leaves I get them in the expected order: > >> confchg followed by totem callback. > > > > > > That *is* the expected order, as far as CPG is concerned anyway. The > > process is node deemed to be a member of the group until all nodes have > > seen its join message. it also makes more logical sense because the node > > has to join the cluster before the process joins the group. > > > > > >> 2. When my app starts up it needs to be able to get the current ring id, > >> so we need to be able to get/force an initial totem callback after a > >> cpg_join that indicates the current ring id. > >> > >> > >> I've also had a problem getting the current sequence number through > >> libcman/cman_get_cluster()/ci_generation --- > >> > >> On node 2 I see: > >> > >> in cman_dispatch statechange callback: > >>call cman_get_cluster(), get generation 2124 > >>call cman_get_nodes(), see node 1 removed > >> > >> in cman_dispatch statechange callback: > >>call cman_get_cluster(), get generation 2128 > >>call cman_get_nodes(), see node 1 added > >> > >> in cman_dispatch statechange callback: > >>call cman_get_cluster(), get generation 2128 (expect 2132) > >>call cman_get_nodes(), see node 1 removed > >> > >> in cman_dispatch statechange callback: > >>call cman_get_cluster(), get generation 2136 > >>call cman_get_nodes(), see node 1 added > >> > >> The second time node 1 is removed I get the previous generation when > >> node 1 was added instead of generation 2132 which the callback is for. > >> > >> On node 4 I do get generation 2132 in that callback as expected. So it > >> seems like it could be a race, I've only gone through this test once. > >> > > > > There is almost certainly a race there. The ring IDs need to be > > delivered at the same time as the change notifications. > > > > Chrissie, > is that problem in cman or in my patch? > > > Chrissie > > > > Regards, > Honza > plain text document attachment (2010-04-08-cpg_model+totem_cb.patch) > commit 0d509f4bf23f618c940c3bcdd7cf0e97faf64876 > Author: Jan Friesse > Date: Thu Apr 8 16:48:45 2010 +0200 > > CPG model_initialize and ringid + members callback > > Patch adds new function to initialize cpg, cpg_model_initialize. Model > is set of callbacks. With this function, future addions of models > should be possible without changing the ABI. > > Patch also contains callback in CPG_MODEL_V1 for notification about > Totem membership changes. > > diff --git a/trunk/include/corosync/cpg.h b/trunk/include/corosync/cpg.h > index b5609df..6189eb5 100644 > --- a/trunk/include/corosync/cpg.h > +++ b/trunk/include/corosync/cpg.h > @@ -78,6 +78,10 @@ typedef enum { > CPG_ITERATION_ALL = 3, > } cpg_iteration_type_t; > > +typedef enum { > + CPG_MODEL_V1 = 1, > +} cpg_model_t; > + > struct cpg_address { > uint32_t nodeid; > uint32_t pid; > @@ -98,6 +102,11 @@ struct cpg_iteration_description_t { > uint32_t pid; > }; > > +struct cpg_ring_id { > + uint32_t nodeid; > + uint64_t seq; > +}; > + > typedef void (*cpg_deliver_fn_t) ( > cpg_handle_t handle, > const struct cpg_name *group_name, > @@ -117,11 +126,32 @@ typedef void (*cpg_confchg_fn_t) ( > const struct cpg_address *left_list, size_t left_list_entries, > const struct cpg_address *joined_list, size_t joined_list_entries); > > +typedef void (*cpg_totem_confchg_fn_t) ( > + cpg_handle_t handle, > + struct cpg_ring_id ring_id, > + uint32_t member_list_entries, > + const uint32_t *member_list); > + > typedef struct { > cpg_deliver_fn_t cpg_deliver_fn; > cpg_confchg_fn_t cpg_confchg_fn; > } cpg_callbacks_t; > > +typedef struct { > + cpg_model_t model; > +} cpg_model_data_t; > + > +#define CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF 0x01 > + > +typedef struct { > + cpg_model_t model; > + cpg_deliver_fn_t cpg_deliver_fn; > + cpg_confchg_fn_t cpg_confchg_fn; > + cpg_totem_confchg_fn_t cpg_totem_confchg_fn; > + unsigned int flags; > +} cpg_model_v1_data_t; > + > + > /** @} */ > > /* > @@ -132,6 +162,15 @@ cs_error_t cpg_initialize ( > cpg_callbacks_t *callbacks); > > /* > + * Create a new cpg connection, initialize with model > +
Re: [Openais] [corosync] cpg_model_initialize - Man page
good for merge although i think this was inyour final cpg_model_initialize patch regards -steve On Wed, 2010-04-07 at 15:02 +0200, Jan Friesse wrote: > SUBJ > > Regards, > Honza > plain text document attachment > (2010-04-07-cpg_model_initialize-manpage.patch) > commit ffc84d55cb7a6b207da6e131f0875328f76f631d > Author: Jan Friesse > Date: Wed Apr 7 15:00:19 2010 +0200 > > cpg_model_initialize man page > > diff --git a/trunk/man/Makefile.am b/trunk/man/Makefile.am > index da01c2e..fe8f71b 100644 > --- a/trunk/man/Makefile.am > +++ b/trunk/man/Makefile.am > @@ -71,6 +71,7 @@ dist_man_MANS = \ > cpg_leave.3 \ > cpg_local_get.3 \ > cpg_mcast_joined.3 \ > + cpg_model_initialize.3 \ > cpg_zcb_mcast_joined.3 \ > cpg_zcb_alloc.3 \ > cpg_zcb_free.3 \ > diff --git a/trunk/man/cpg_initialize.3 b/trunk/man/cpg_initialize.3 > index ce6e25a..6d4bc51 100644 > --- a/trunk/man/cpg_initialize.3 > +++ b/trunk/man/cpg_initialize.3 > @@ -41,7 +41,10 @@ cpg_initialize \- Create a new connection to the CPG > service > .SH DESCRIPTION > The > .B cpg_initialize > -function is used to initialize a connection to the closed process groups API. > +function is used to initialize a connection to the closed process groups > API. This function is deprecated > +and > +.B cpg_model_initialize > +should be used in newly written code. > .PP > Each application may have several connections to the CPG API. Each > application > uses the > @@ -167,5 +170,6 @@ The errors are undocumented. > .BR cpg_context_get (3) > .BR cpg_context_set (3) > .BR cpg_local_get (3) > +.BR cpg_model_initialize (3) > > .PP > diff --git a/trunk/man/cpg_model_initialize.3 > b/trunk/man/cpg_model_initialize.3 > new file mode 100644 > index 000..8ecf810 > --- /dev/null > +++ b/trunk/man/cpg_model_initialize.3 > @@ -0,0 +1,227 @@ > +.\"/* > +.\" * Copyright (c) 2010 Red Hat, Inc. > +.\" * > +.\" * All rights reserved. > +.\" * > +.\" * Author: Jan Friesse > +.\" * Author: Christine Caulfield > +.\" * > +.\" * This software licensed under BSD license, the text of which follows: > +.\" * > +.\" * Redistribution and use in source and binary forms, with or without > +.\" * modification, are permitted provided that the following conditions are > met: > +.\" * > +.\" * - Redistributions of source code must retain the above copyright > notice, > +.\" * this list of conditions and the following disclaimer. > +.\" * - Redistributions in binary form must reproduce the above copyright > notice, > +.\" * this list of conditions and the following disclaimer in the > documentation > +.\" * and/or other materials provided with the distribution. > +.\" * - Neither the name of the MontaVista Software, Inc. nor the names of > its > +.\" * contributors may be used to endorse or promote products derived from > this > +.\" * software without specific prior written permission. > +.\" * > +.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > "AS IS" > +.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, > THE > +.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR > PURPOSE > +.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS > BE > +.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR > +.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF > +.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR > BUSINESS > +.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN > +.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) > +.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF > +.\" * THE POSSIBILITY OF SUCH DAMAGE. > +.\" */ > +.TH CPG_MODEL_INITIALIZE 3 2010-04-07 "corosync Man Page" "Corosync Cluster > Engine Programmer's Manual" > +.SH NAME > +cpg_model_initialize \- Create a new connection to the CPG service > +.SH SYNOPSIS > +.B #include > +.sp > +.BI "cs_error_t cpg_model_initialize(cpg_handle_t *" handle ", cpg_model_t " > model ", cpg_model_data_t *" model_data ", void *" context "); > + > +.SH DESCRIPTION > +The > +.B cpg_model_initialize > +function is used to initialize a connection to the closed process groups API. > +.PP > +Each application may have several connections to the CPG API. Each > application > +uses the > +.I handle > +argument to uniquely identify the connection. The > +.I handle > +argument is then used in other function calls to identify the connection to > be used > +for communication with the CPG service. > +.PP > +Argument > +.I model > +is used to explicitly choose set of callbacks and internal parameters. > Currently only model > +.I CPG_MODEL_V1 > +is defined. > +.PP > +Callbacks and internal parameters are passed by > +.I model_data > +argument. This is casted pointer (idea is similar as in sockaddr function) > to one of structures > +
Re: [Openais] corosync trunk - proper linking of confdb
good for merge On Thu, 2010-04-08 at 17:55 +0200, Jan Friesse wrote: > Attached patch solves problem with linking of confdb (linked without -ldl). > > Regards, > Honza > plain text document attachment (2010-04-08-libconfdb-linking.patch) > commit 4f18594441ffb58495832c40d828f8e31bacea87 > Author: Jan Friesse > Date: Thu Apr 8 17:46:43 2010 +0200 > > Fix confdb linking > > diff --git a/trunk/lib/Makefile.am b/trunk/lib/Makefile.am > index 7e4974e..2a011e6 100644 > --- a/trunk/lib/Makefile.am > +++ b/trunk/lib/Makefile.am > @@ -36,6 +36,7 @@ get_soname=$(if $($(call uc,$1)_SONAME),$($(call > uc,$1)_SONAME),$(SONAME)) > get_major=$(firstword $(subst ., ,$(call get_soname,$1))) > get_sharedlibs=$(foreach lib,$(SHARED_LIBS_SO:lib%.so=%),lib$(lib).so.$(call > get_soname,$(lib))) > get_sharedlibs_two=$(foreach > lib,$(SHARED_LIBS_SO:lib%.so=%),lib$(lib).so.$(call get_major,$(lib))) > +get_linker_add=$(if $($(call uc,$1)_LINKER_ADD),$($(call uc,$1)_LINKER_ADD)) > > MAINTAINERCLEANFILES= Makefile.in > > @@ -57,6 +58,7 @@ libquorum_a_SOURCES = quorum.c > libvotequorum_a_SOURCES = votequorum.c > libconfdb_a_SOURCES = confdb.c sa-confdb.c > libconfdb_a_LIBADD = ../lcr/lcr_ifact.o > +CONFDB_LINKER_ADD= $(OS_DYFLAGS) $(OS_LDL) > libcoroipcc_a_SOURCES= coroipcc.c > libsam_a_SOURCES = sam.c > > @@ -78,7 +80,7 @@ libcoroipcc.so.$(SONAME): coroipcc.o > ln -sf libcoroipcc.so.$(SONAME) libcoroipcc.so.$(SOMAJOR) > > lib%.so: lib%.a libcoroipcc.so.$(SONAME) > - $(CC) $(DARWIN_OPTS) -Wl,-whole-archive $^ -Wl,-no-whole-archive -o $@ > + $(CC) $(DARWIN_OPTS) $(call get_linker_add,$*) -Wl,-whole-archive $^ > -Wl,-no-whole-archive -o $@ > ln -sf lib$*.so.$(call get_soname,$*) lib$*.so > ln -sf lib$*.so.$(call get_soname,$*) lib$*.so.$(call get_major,$*) > > @@ -92,7 +94,7 @@ libcoroipcc.so.$(SONAME): coroipcc.o > ln -sf libcoroipcc.so.$(SONAME) libcoroipcc.so.$(SOMAJOR) > > lib%.so.$(SONAME): lib%.a libcoroipcc.so.$(SONAME) > - $(LD) $(SOLARIS_OPTS) -G -whole-archive $^ -no-whole-archive -o $@ > + $(LD) $(SOLARIS_OPTS) $(call get_linker_add,$*) -G -whole-archive $^ > -no-whole-archive -o $@ > ln -sf lib$*.so.$(call get_soname,$*) lib$*.so > ln -sf lib$*.so.$(call get_soname,$*) lib$*.so.$(call get_major,$*) > > @@ -110,7 +112,7 @@ lib%.so: lib%.a libcoroipcc.so.$(SONAME) > $(CC) -shared -o $...@.$(call get_soname,$*) \ > -Wl,-soname=lib$*.so.$(call get_major,$*) \ > -Wl,-version-script=$(srcdir)/lib$*.versions \ > - -Wl,-whole-archive $^ -Wl,-no-whole-archive $(LDFLAGS) > $(AM_LDFLAGS) > + -Wl,-whole-archive $^ -Wl,-no-whole-archive $(LDFLAGS) > $(AM_LDFLAGS) $(call get_linker_add,$*) > ln -sf lib$*.so.$(call get_soname,$*) lib$*.so > ln -sf lib$*.so.$(call get_soname,$*) lib$*.so.$(call get_major,$*) > > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/openais ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] corosync - Support for store user data in SAM
good for merge On Thu, 2010-04-15 at 14:22 +0200, Jan Friesse wrote: > Support for store user data in SAM > > Ability to in-memory storing of user data which survives between > instances of process. > > Also ability needed ability for bi-directional communication between > child and parent is added. > > Regards, > Honza > plain text document attachment (2010-04-15-sam-user-data-store.patch) > commit d6f2002ec4ce9384859e0952bffc72d677df9576 > Author: Jan Friesse > Date: Thu Apr 15 14:18:53 2010 +0200 > > Support for store user data in SAM > > Ability to in-memory storing of user data which survives between > instances of process. > > Also ability needed ability for bi-directional communication between > child and parent is added. > > diff --git a/trunk/include/corosync/sam.h b/trunk/include/corosync/sam.h > index bf6b069..4e60e17 100644 > --- a/trunk/include/corosync/sam.h > +++ b/trunk/include/corosync/sam.h > @@ -160,6 +160,49 @@ cs_error_t sam_hc_send (void); > */ > cs_error_t sam_hc_callback_register (sam_hc_callback_t cb); > > +/* > + * Return size of stored data. > + * @param size Pointer to variable, where stored data size is returned. If > + * nothing or NULL is stored, then 0 is returned. > + * @return > + * - CS_OK in case no problem appeared > + * - CS_ERR_BAD_HANDLE in case you call this function before sam_init or > after > + * sam_finalize > + * - CS_ERR_INVALID_PARAM if size parameter is NULL > + */ > +cs_error_t sam_data_getsize (size_t *size); > + > +/* > + * Return stored data. > + * @param data Pointer to place, where to store data > + * @param size Allocated size of data > + * @return > + * - CS_OK if no problem appeared > + * - CS_ERR_BAD_HANDLE if you call this function before sam_init or after > sam_finalize > + * - CS_ERR_INVALID_PARAM if data is NULL or size is less then currently > saved user data length > + */ > +cs_error_t sam_data_restore ( > + void *data, > + size_t size); > + > +/* > + * Store user data. Such stored data survives restart of child. > + * @param data Data to store. You can use NULL to delete data > + * @param size Size of data to store. > + * @return > + * - CS_OK in case no problem appeared > + * - CS_ERR_BAD_HANDLE if you call this function before sam_init or > + * after sam_finalize > + * - CS_ERR_NO_MEMORY if data is too large and malloc/realloc was not > + * sucesfull > + * - CS_ERR_LIBRARY if some internal error appeared (communication with > parent > + * process) > + */ > +cs_error_t sam_data_store ( > + const void *data, > + size_t size); > + > + > #ifdef __cplusplus > } > #endif > diff --git a/trunk/lib/libsam.verso b/trunk/lib/libsam.verso > index ee74734..6aba2b2 100644 > --- a/trunk/lib/libsam.verso > +++ b/trunk/lib/libsam.verso > @@ -1 +1 @@ > -4.1.0 > +4.2.0 > diff --git a/trunk/lib/sam.c b/trunk/lib/sam.c > index 9487afd..207d4f9 100644 > --- a/trunk/lib/sam.c > +++ b/trunk/lib/sam.c > @@ -38,6 +38,7 @@ > > #include > > +#include > #include > #include > #include > @@ -70,7 +71,13 @@ enum sam_internal_status_t { > enum sam_command_t { > SAM_COMMAND_START, > SAM_COMMAND_STOP, > - SAM_COMMAND_HB > + SAM_COMMAND_HB, > + SAM_COMMAND_DATA_STORE, > +}; > + > +enum sam_reply_t { > + SAM_REPLY_OK, > + SAM_REPLY_ERROR, > }; > > enum sam_parent_action_t { > @@ -85,14 +92,20 @@ static struct { > sam_recovery_policy_t recovery_policy; > enum sam_internal_status_t internal_status; > unsigned int instance_id; > - int parent_fd; > + int child_fd_out; > + int child_fd_in; > int term_send; > int warn_signal; > + int am_i_child; > > sam_hc_callback_t hc_callback; > pthread_t cb_thread; > int cb_rpipe_fd, cb_wpipe_fd; > int cb_registered; > + > + void *user_data; > + size_t user_data_size; > + size_t user_data_allocated; > } sam_internal_data; > > cs_error_t sam_initialize ( > @@ -115,6 +128,12 @@ cs_error_t sam_initialize ( > > sam_internal_data.warn_signal = SIGTERM; > > + sam_internal_data.am_i_child = 0; > + > + sam_internal_data.user_data = NULL; > + sam_internal_data.user_data_size = 0; > + sam_internal_data.user_data_allocated = 0; > + > return (CS_OK); > } > > @@ -132,7 +151,8 @@ static size_t sam_safe_write ( > bytes_write = 0; > > do { > - tmp_bytes_write = write (d, (const char *)buf + bytes_write, > nbyte - bytes_write); > + tmp_bytes_write = write (d, (const char *)buf + bytes_write, > + (nbyte - bytes_write > SSIZE_MAX) ? SSIZE_MAX : nbyte - > bytes_write); > > if (tmp_bytes_write == -1) { > if (!(errno == EAGAIN || errno == EINTR)) > @@ -142,7 +162,176 @@ static size_t sam_safe_write ( > } > } while (bytes_write != nbyte); > > - return bytes_write; > + return (bytes_write); > +} >
Re: [Openais] It is possible to use the same multicast address on a network with the openais services configured for different UDP ports
On Thu, 2010-04-15 at 18:36 +0530, manik gaur wrote: > I am trying to configure openais as a different service uses different > ports. It is written in the openais.conf man page. > > “It is possible to use the same multicast address on a network with > the openais services configured for different UDP ports” > > but as I am new to the openais I don’t know where to start. > > You can set two unique clusters by using the same multicast address for both clusters, but setting the "port" field in the config file differently. (ateast 2 ports higher or lower). Regards -steve > If any body has the solution please let me know. > > > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/openais ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] [PATCH corosync] Fix crash in YKD
good for merge regards -steve On Wed, 2010-04-14 at 11:10 +0100, Christine Caulfield wrote: > This patch fixes an assert in YKD where an ATTEMPT message is received > while the state machine is in SENDSTATE. I have no idea if it's > theoretically correct or not, but it works on my 16 node cluster. > > It also fixes a send routine so it puts the right type of message in the > header (now that we're looking it that matters!) > > Chrissie > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/openais ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] single node config
On Wed, 2010-04-14 at 16:58 -0700, Alan Jones wrote: > Hi, > I'd like to run Pacemaker in a single node configuration with a > network. > My thinking is to run with corosync using the loopback (localhost) > subnet. > Does this make sense? Is there a better way? This may work. Corosync is designed to work single node via loopback, but YMMV ;-) Regards -steve > Thanks! > Alan > > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/openais ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] Quick Firewall Configuration Questions
On Mon, 2010-04-19 at 10:51 -0400, Jake Bogie wrote: > Hello Everyone, > > > > Quick question…what ports/protocol does OpenAIS utilize for > communications? > > > > I know they are configurable however what is the most common > configuration settings people are using? > > > > Thanks! > > The protocol used is called the Totem Single Ring Protocol. ports 5404(udp) and 5405(udp) are generally most often used. > > Jake > > > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailman/listinfo/openais ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
[Openais] Quick Firewall Configuration Questions
Hello Everyone, Quick question...what ports/protocol does OpenAIS utilize for communications? I know they are configurable however what is the most common configuration settings people are using? Thanks! Jake ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] Failover constraint problem
Now I restarted again node0 and apache-group migrated to node1 (as expected) nsf_client did not start on node1 as expected. When node0 comes up again the apache-group migrated to node0 and nfs_client started on node1 (as expected). I keep continue testing and will feed back about the results. Thank you for your efforts! Sándor Fehér írta: > Andrew Beekhof írta: >> 2010/4/19 Sándor Fehér : >> >>> Hi, >>> >>> I changed the config as you suggested: >>> --- >>> colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master >>> colocation co_nfs_client -inf: nfs_client ms-drbd0:Master >>> order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start >>> --- >>> >>> Now I get this: >>> >> >> When you do what? >> > changed the config, commit then rebooted both nodes. >> Make the change? Repeat the test? Something else? >> > Saw the wrong result after reboot then crm_resource --resource > nfs_client --cleanup. > I got the same result. >> >>> Online: [ node0 node1 ] >>> >>> Resource Group: apache-group >>> fs0(ocf::heartbeat:Filesystem):Started node0 >>> virtual-ip (ocf::heartbeat:IPaddr2): Started node0 >>> nfs_server (lsb:nfs-kernel-server):Started node0 >>> Master/Slave Set: ms-drbd0 >>> Masters: [ node0 ] >>> Slaves: [ node1 ] >>> nfs_client (ocf::heartbeat:Filesystem):Started node0 (unmanaged) >>> FAILED >>> >>> Failed actions: >>> nfs_client_stop_0 (node=node0, call=21, rc=1, status=complete): unknown >>> error >>> node1:~# >>> >>> Here is the relevant part of daemon.log http://pastebin.com/L9scU4fy >>> >>> Thank you ! >>> >>> Andrew Beekhof írta: >>> >>> On Sat, Apr 17, 2010 at 12:21 AM, Sandor Feher wrote: >>> >>> >>> Hi, >>> >>> First of all my goal is to set up a two-node cluster with pacemaker to >>> serve our webhosting service. >>> This config sites on two vmware virtual machines for testing purposes >>> now. Both of them runs Debian Lenny. >>> >>> Here are the basic rules I set up: >>> >>> node0 has >>> >>> virtual ip >>> drbd primary filesystem mounted under /mnt >>> nfs server offers /mnt mount point to node1 >>> >>> node1 >>> >>> drbd secondary node >>> nfs_client mounts node0's /mnt dir and it should be rw for both nodes >>> >>> If node0 fails then node1 will act as primary drbd node, take over >>> virtual ip and mount drbd partition under /mnt dir and will not start >>> nfs_client resource because it makes no sense (nfs_client should be take >>> down before drbd partition get mounted under /mnt). >>> If node1 fails the nothing should be happen because nfs_client only run >>> node which has secondary drbd partition >>> >>> So my problems are the following. >>> >>> 1. If I migrate apache-group resorce to another node then nfs_client >>> won't release the /mnt mount point (I know according to this config it >>> should not). >>> I think I need some clever constraint to achieve this. >>> >>> >>> Perhaps instead of: >>>colocation co_nfs_client inf: nfs_client ms-drbd0:Slave >>> try: >>>colocation co_nfs_client -inf: nfs_client ms-drbd0:Master >>> >>> >>> >>> >>> 2. If I shot down node1 (suppose that node0 the master at the moment and >>> runs apache-group) then nothing happens as expected but if node1 comes >>> online again the apache-group start to migrate to node1. I don't >>> understand why >>> >>> >>> because you told it to: >>>location cli-prefer-apache-group apache-group \ >>> rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 >>> >>> Change inf to (for example) 1000 >>> >>> >>> >>> because there is a constraint for this to get >>> apache-group run on node which primary drbd resource and in this >>> situation node0 is. >>> >>> >>> crm configure show >>> >>> node node0 \ >>>attributes standby="off" >>> node node1 \ >>>attributes standby="off" >>> primitive drbd0 ocf:heartbeat:drbd \ >>>params drbd_resource="r0" \ >>>op monitor interval="59s" role="Master" timeout="30s" \ >>>op monitor interval="60s" role="Slave" timeout="30s" >>> primitive fs0 ocf:heartbeat:Filesystem \ >>>params fstype="ext3" directory="/mnt" device="/dev/drbd0" \ >>>meta target-role="Started" >>> primitive nfs_client ocf:heartbeat:Filesystem \ >>>params fstype="nfs" directory="/mnt/" >>> device="192.168.1.40:/mnt/" >>> options="hard,intr,noatime,rw,nolock,tcp,timeo=50" \ >>>meta target-role="Stopped" >>> primitive nfs_server lsb:nfs-kernel-server \ >>>op monitor interval="1min" >>> primitive virtual-ip ocf:heartbeat:IPaddr2 \ >>>params ip="192.168.1.40" broadcast="192.168.1.255" nic="eth0" >>> cidr_netmask="24" \ >>>op monitor interval="21s" timeout="5s" target-role="Started" >>> group apache-group fs0 virtual-ip nfs_server \ >>>meta target-role="Started" >>> ms ms-drbd0 drbd0 \ >>>meta clone-max="2" notify="true" globally-unique="false" >>> target-role="Started" >>> location
Re: [Openais] Failover constraint problem
Hi, I changed the config as you suggested: --- colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master *colocation co_nfs_client -inf: nfs_client ms-drbd0:Master* order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start --- Now I get this: Online: [ node0 node1 ] Resource Group: apache-group fs0(ocf::heartbeat:Filesystem):Started node0 virtual-ip (ocf::heartbeat:IPaddr2): Started node0 nfs_server (lsb:nfs-kernel-server):Started node0 Master/Slave Set: ms-drbd0 Masters: [ node0 ] Slaves: [ node1 ] nfs_client (ocf::heartbeat:Filesystem):Started node0 (unmanaged) FAILED Failed actions: nfs_client_stop_0 (node=node0, call=21, rc=1, status=complete): unknown error node1:~# Here is the relevant part of daemon.log http://pastebin.com/L9scU4fy Thank you ! Andrew Beekhof írta: > On Sat, Apr 17, 2010 at 12:21 AM, Sandor Feher wrote: > >> Hi, >> >> First of all my goal is to set up a two-node cluster with pacemaker to >> serve our webhosting service. >> This config sites on two vmware virtual machines for testing purposes >> now. Both of them runs Debian Lenny. >> >> Here are the basic rules I set up: >> >> node0 has >> >> virtual ip >> drbd primary filesystem mounted under /mnt >> nfs server offers /mnt mount point to node1 >> >> node1 >> >> drbd secondary node >> nfs_client mounts node0's /mnt dir and it should be rw for both nodes >> >> If node0 fails then node1 will act as primary drbd node, take over >> virtual ip and mount drbd partition under /mnt dir and will not start >> nfs_client resource because it makes no sense (nfs_client should be take >> down before drbd partition get mounted under /mnt). >> If node1 fails the nothing should be happen because nfs_client only run >> node which has secondary drbd partition >> >> So my problems are the following. >> >> 1. If I migrate apache-group resorce to another node then nfs_client >> won't release the /mnt mount point (I know according to this config it >> should not). >> I think I need some clever constraint to achieve this. >> > > Perhaps instead of: >colocation co_nfs_client inf: nfs_client ms-drbd0:Slave > try: >colocation co_nfs_client -inf: nfs_client ms-drbd0:Master > > > >> 2. If I shot down node1 (suppose that node0 the master at the moment and >> runs apache-group) then nothing happens as expected but if node1 comes >> online again the apache-group start to migrate to node1. I don't >> understand why >> > > because you told it to: >location cli-prefer-apache-group apache-group \ > rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 > > Change inf to (for example) 1000 > > >> because there is a constraint for this to get >> apache-group run on node which primary drbd resource and in this >> situation node0 is. >> >> >> crm configure show >> >> node node0 \ >>attributes standby="off" >> node node1 \ >>attributes standby="off" >> primitive drbd0 ocf:heartbeat:drbd \ >>params drbd_resource="r0" \ >>op monitor interval="59s" role="Master" timeout="30s" \ >>op monitor interval="60s" role="Slave" timeout="30s" >> primitive fs0 ocf:heartbeat:Filesystem \ >>params fstype="ext3" directory="/mnt" device="/dev/drbd0" \ >>meta target-role="Started" >> primitive nfs_client ocf:heartbeat:Filesystem \ >>params fstype="nfs" directory="/mnt/" >> device="192.168.1.40:/mnt/" >> options="hard,intr,noatime,rw,nolock,tcp,timeo=50" \ >>meta target-role="Stopped" >> primitive nfs_server lsb:nfs-kernel-server \ >>op monitor interval="1min" >> primitive virtual-ip ocf:heartbeat:IPaddr2 \ >>params ip="192.168.1.40" broadcast="192.168.1.255" nic="eth0" >> cidr_netmask="24" \ >>op monitor interval="21s" timeout="5s" target-role="Started" >> group apache-group fs0 virtual-ip nfs_server \ >>meta target-role="Started" >> ms ms-drbd0 drbd0 \ >>meta clone-max="2" notify="true" globally-unique="false" >> target-role="Started" >> location cli-prefer-apache-group apache-group \ >>rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 >> colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master >> colocation co_nfs_client inf: nfs_client ms-drbd0:Slave >> order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start >> order ms-drbd0-before-nfs_client inf: ms-drbd0:promote nfs_client:start >> property $id="cib-bootstrap-options" \ >>dc-version="1.0.8-2c98138c2f070fcb6ddeab1084154cffbf44ba75" \ >>cluster-infrastructure="openais" \ >>stonith-enabled="false" \ >>no-quorum-policy="ignore" \ >>expected-quorum-votes="2" \ >>last-lrm-refresh="1271453094" >> >> node1:~# crm_mon -1 >> >> Last updated: Fri Apr 16 23:49:30 2010 >> Stack: openais >> Current DC: node0 - partition with quorum >> Version: 1.0.8-2c98138c2f070fcb6ddeab1084
[Openais] Please remove me from this list.
___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] Failover constraint problem
Andrew Beekhof írta: > 2010/4/19 Sándor Fehér : > >> Hi, >> >> I changed the config as you suggested: >> --- >> colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master >> colocation co_nfs_client -inf: nfs_client ms-drbd0:Master >> order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start >> --- >> >> Now I get this: >> > > When you do what? > changed the config, commit then rebooted both nodes. > Make the change? Repeat the test? Something else? > Saw the wrong result after reboot then crm_resource --resource nfs_client --cleanup. I got the same result. > >> Online: [ node0 node1 ] >> >> Resource Group: apache-group >> fs0(ocf::heartbeat:Filesystem):Started node0 >> virtual-ip (ocf::heartbeat:IPaddr2): Started node0 >> nfs_server (lsb:nfs-kernel-server):Started node0 >> Master/Slave Set: ms-drbd0 >> Masters: [ node0 ] >> Slaves: [ node1 ] >> nfs_client (ocf::heartbeat:Filesystem):Started node0 (unmanaged) >> FAILED >> >> Failed actions: >> nfs_client_stop_0 (node=node0, call=21, rc=1, status=complete): unknown >> error >> node1:~# >> >> Here is the relevant part of daemon.log http://pastebin.com/L9scU4fy >> >> Thank you ! >> >> Andrew Beekhof írta: >> >> On Sat, Apr 17, 2010 at 12:21 AM, Sandor Feher wrote: >> >> >> Hi, >> >> First of all my goal is to set up a two-node cluster with pacemaker to >> serve our webhosting service. >> This config sites on two vmware virtual machines for testing purposes >> now. Both of them runs Debian Lenny. >> >> Here are the basic rules I set up: >> >> node0 has >> >> virtual ip >> drbd primary filesystem mounted under /mnt >> nfs server offers /mnt mount point to node1 >> >> node1 >> >> drbd secondary node >> nfs_client mounts node0's /mnt dir and it should be rw for both nodes >> >> If node0 fails then node1 will act as primary drbd node, take over >> virtual ip and mount drbd partition under /mnt dir and will not start >> nfs_client resource because it makes no sense (nfs_client should be take >> down before drbd partition get mounted under /mnt). >> If node1 fails the nothing should be happen because nfs_client only run >> node which has secondary drbd partition >> >> So my problems are the following. >> >> 1. If I migrate apache-group resorce to another node then nfs_client >> won't release the /mnt mount point (I know according to this config it >> should not). >> I think I need some clever constraint to achieve this. >> >> >> Perhaps instead of: >>colocation co_nfs_client inf: nfs_client ms-drbd0:Slave >> try: >>colocation co_nfs_client -inf: nfs_client ms-drbd0:Master >> >> >> >> >> 2. If I shot down node1 (suppose that node0 the master at the moment and >> runs apache-group) then nothing happens as expected but if node1 comes >> online again the apache-group start to migrate to node1. I don't >> understand why >> >> >> because you told it to: >>location cli-prefer-apache-group apache-group \ >> rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 >> >> Change inf to (for example) 1000 >> >> >> >> because there is a constraint for this to get >> apache-group run on node which primary drbd resource and in this >> situation node0 is. >> >> >> crm configure show >> >> node node0 \ >>attributes standby="off" >> node node1 \ >>attributes standby="off" >> primitive drbd0 ocf:heartbeat:drbd \ >>params drbd_resource="r0" \ >>op monitor interval="59s" role="Master" timeout="30s" \ >>op monitor interval="60s" role="Slave" timeout="30s" >> primitive fs0 ocf:heartbeat:Filesystem \ >>params fstype="ext3" directory="/mnt" device="/dev/drbd0" \ >>meta target-role="Started" >> primitive nfs_client ocf:heartbeat:Filesystem \ >>params fstype="nfs" directory="/mnt/" >> device="192.168.1.40:/mnt/" >> options="hard,intr,noatime,rw,nolock,tcp,timeo=50" \ >>meta target-role="Stopped" >> primitive nfs_server lsb:nfs-kernel-server \ >>op monitor interval="1min" >> primitive virtual-ip ocf:heartbeat:IPaddr2 \ >>params ip="192.168.1.40" broadcast="192.168.1.255" nic="eth0" >> cidr_netmask="24" \ >>op monitor interval="21s" timeout="5s" target-role="Started" >> group apache-group fs0 virtual-ip nfs_server \ >>meta target-role="Started" >> ms ms-drbd0 drbd0 \ >>meta clone-max="2" notify="true" globally-unique="false" >> target-role="Started" >> location cli-prefer-apache-group apache-group \ >>rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 >> colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master >> colocation co_nfs_client inf: nfs_client ms-drbd0:Slave >> order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start >> order ms-drbd0-before-nfs_client inf: ms-drbd0:promote nfs_client:start >> property $id="cib-bootstrap-options" \ >>dc-version="1.0.8-2c98138c2f070fcb6ddeab1
Re: [Openais] Failover constraint problem
2010/4/19 Sándor Fehér : > Hi, > > I changed the config as you suggested: > --- > colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master > colocation co_nfs_client -inf: nfs_client ms-drbd0:Master > order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start > --- > > Now I get this: When you do what? Make the change? Repeat the test? Something else? > Online: [ node0 node1 ] > > Resource Group: apache-group > fs0 (ocf::heartbeat:Filesystem): Started node0 > virtual-ip (ocf::heartbeat:IPaddr2): Started node0 > nfs_server (lsb:nfs-kernel-server): Started node0 > Master/Slave Set: ms-drbd0 > Masters: [ node0 ] > Slaves: [ node1 ] > nfs_client (ocf::heartbeat:Filesystem): Started node0 (unmanaged) > FAILED > > Failed actions: > nfs_client_stop_0 (node=node0, call=21, rc=1, status=complete): unknown > error > node1:~# > > Here is the relevant part of daemon.log http://pastebin.com/L9scU4fy > > Thank you ! > > Andrew Beekhof írta: > > On Sat, Apr 17, 2010 at 12:21 AM, Sandor Feher wrote: > > > Hi, > > First of all my goal is to set up a two-node cluster with pacemaker to > serve our webhosting service. > This config sites on two vmware virtual machines for testing purposes > now. Both of them runs Debian Lenny. > > Here are the basic rules I set up: > > node0 has > > virtual ip > drbd primary filesystem mounted under /mnt > nfs server offers /mnt mount point to node1 > > node1 > > drbd secondary node > nfs_client mounts node0's /mnt dir and it should be rw for both nodes > > If node0 fails then node1 will act as primary drbd node, take over > virtual ip and mount drbd partition under /mnt dir and will not start > nfs_client resource because it makes no sense (nfs_client should be take > down before drbd partition get mounted under /mnt). > If node1 fails the nothing should be happen because nfs_client only run > node which has secondary drbd partition > > So my problems are the following. > > 1. If I migrate apache-group resorce to another node then nfs_client > won't release the /mnt mount point (I know according to this config it > should not). > I think I need some clever constraint to achieve this. > > > Perhaps instead of: >colocation co_nfs_client inf: nfs_client ms-drbd0:Slave > try: >colocation co_nfs_client -inf: nfs_client ms-drbd0:Master > > > > > 2. If I shot down node1 (suppose that node0 the master at the moment and > runs apache-group) then nothing happens as expected but if node1 comes > online again the apache-group start to migrate to node1. I don't > understand why > > > because you told it to: >location cli-prefer-apache-group apache-group \ > rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 > > Change inf to (for example) 1000 > > > > because there is a constraint for this to get > apache-group run on node which primary drbd resource and in this > situation node0 is. > > > crm configure show > > node node0 \ > attributes standby="off" > node node1 \ > attributes standby="off" > primitive drbd0 ocf:heartbeat:drbd \ > params drbd_resource="r0" \ > op monitor interval="59s" role="Master" timeout="30s" \ > op monitor interval="60s" role="Slave" timeout="30s" > primitive fs0 ocf:heartbeat:Filesystem \ > params fstype="ext3" directory="/mnt" device="/dev/drbd0" \ > meta target-role="Started" > primitive nfs_client ocf:heartbeat:Filesystem \ > params fstype="nfs" directory="/mnt/" > device="192.168.1.40:/mnt/" > options="hard,intr,noatime,rw,nolock,tcp,timeo=50" \ > meta target-role="Stopped" > primitive nfs_server lsb:nfs-kernel-server \ > op monitor interval="1min" > primitive virtual-ip ocf:heartbeat:IPaddr2 \ > params ip="192.168.1.40" broadcast="192.168.1.255" nic="eth0" > cidr_netmask="24" \ > op monitor interval="21s" timeout="5s" target-role="Started" > group apache-group fs0 virtual-ip nfs_server \ > meta target-role="Started" > ms ms-drbd0 drbd0 \ > meta clone-max="2" notify="true" globally-unique="false" > target-role="Started" > location cli-prefer-apache-group apache-group \ > rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 > colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master > colocation co_nfs_client inf: nfs_client ms-drbd0:Slave > order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start > order ms-drbd0-before-nfs_client inf: ms-drbd0:promote nfs_client:start > property $id="cib-bootstrap-options" \ > dc-version="1.0.8-2c98138c2f070fcb6ddeab1084154cffbf44ba75" \ > cluster-infrastructure="openais" \ > stonith-enabled="false" \ > no-quorum-policy="ignore" \ > expected-quorum-votes="2" \ > last-lrm-refresh="1271453094" > > node1:~# crm_mon -1 > > Last updated: Fri Apr 16 23:49:30 2010 > Stack: openais > Current DC: node0 - partition with quoru
Re: [Openais] Missing shutdown messages with corosync 1.2.1 and pacemaker
On Mon, Apr 12, 2010 at 3:19 PM, Andreas Mock wrote: > -Ursprüngliche Nachricht- > Von: Andrew Beekhof > Gesendet: 12.04.2010 08:58:44 > An: Andreas Mock > Betreff: Re: [Openais] Missing shutdown messages with corosync 1.2.1 and > pacemaker > > Hi all, > >>You might want to include your corosync config file. > > See attached (for the second try with logging to file) > >>Does the same happen if you configure log-to-file? > > Yes, it's the same. To to be sure, you're saying the logs are also missing from /tmp/corosync.log ? ___ Openais mailing list Openais@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/openais
Re: [Openais] Failover constraint problem
On Sat, Apr 17, 2010 at 12:21 AM, Sandor Feher wrote: > Hi, > > First of all my goal is to set up a two-node cluster with pacemaker to > serve our webhosting service. > This config sites on two vmware virtual machines for testing purposes > now. Both of them runs Debian Lenny. > > Here are the basic rules I set up: > > node0 has > > virtual ip > drbd primary filesystem mounted under /mnt > nfs server offers /mnt mount point to node1 > > node1 > > drbd secondary node > nfs_client mounts node0's /mnt dir and it should be rw for both nodes > > If node0 fails then node1 will act as primary drbd node, take over > virtual ip and mount drbd partition under /mnt dir and will not start > nfs_client resource because it makes no sense (nfs_client should be take > down before drbd partition get mounted under /mnt). > If node1 fails the nothing should be happen because nfs_client only run > node which has secondary drbd partition > > So my problems are the following. > > 1. If I migrate apache-group resorce to another node then nfs_client > won't release the /mnt mount point (I know according to this config it > should not). > I think I need some clever constraint to achieve this. Perhaps instead of: colocation co_nfs_client inf: nfs_client ms-drbd0:Slave try: colocation co_nfs_client -inf: nfs_client ms-drbd0:Master > 2. If I shot down node1 (suppose that node0 the master at the moment and > runs apache-group) then nothing happens as expected but if node1 comes > online again the apache-group start to migrate to node1. I don't > understand why because you told it to: location cli-prefer-apache-group apache-group \ rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 Change inf to (for example) 1000 > because there is a constraint for this to get > apache-group run on node which primary drbd resource and in this > situation node0 is. > > > crm configure show > > node node0 \ > attributes standby="off" > node node1 \ > attributes standby="off" > primitive drbd0 ocf:heartbeat:drbd \ > params drbd_resource="r0" \ > op monitor interval="59s" role="Master" timeout="30s" \ > op monitor interval="60s" role="Slave" timeout="30s" > primitive fs0 ocf:heartbeat:Filesystem \ > params fstype="ext3" directory="/mnt" device="/dev/drbd0" \ > meta target-role="Started" > primitive nfs_client ocf:heartbeat:Filesystem \ > params fstype="nfs" directory="/mnt/" > device="192.168.1.40:/mnt/" > options="hard,intr,noatime,rw,nolock,tcp,timeo=50" \ > meta target-role="Stopped" > primitive nfs_server lsb:nfs-kernel-server \ > op monitor interval="1min" > primitive virtual-ip ocf:heartbeat:IPaddr2 \ > params ip="192.168.1.40" broadcast="192.168.1.255" nic="eth0" > cidr_netmask="24" \ > op monitor interval="21s" timeout="5s" target-role="Started" > group apache-group fs0 virtual-ip nfs_server \ > meta target-role="Started" > ms ms-drbd0 drbd0 \ > meta clone-max="2" notify="true" globally-unique="false" > target-role="Started" > location cli-prefer-apache-group apache-group \ > rule $id="cli-prefer-rule-apache-group" inf: #uname eq node0 > colocation apache-group-on-ms-drbd0 inf: apache-group ms-drbd0:Master > colocation co_nfs_client inf: nfs_client ms-drbd0:Slave > order ms-drbd0-before-apache-group inf: ms-drbd0:promote apache-group:start > order ms-drbd0-before-nfs_client inf: ms-drbd0:promote nfs_client:start > property $id="cib-bootstrap-options" \ > dc-version="1.0.8-2c98138c2f070fcb6ddeab1084154cffbf44ba75" \ > cluster-infrastructure="openais" \ > stonith-enabled="false" \ > no-quorum-policy="ignore" \ > expected-quorum-votes="2" \ > last-lrm-refresh="1271453094" > > node1:~# crm_mon -1 > > Last updated: Fri Apr 16 23:49:30 2010 > Stack: openais > Current DC: node0 - partition with quorum > Version: 1.0.8-2c98138c2f070fcb6ddeab1084154cffbf44ba75 > 2 Nodes configured, 2 expected votes > 3 Resources configured. > > > Online: [ node0 node1 ] > > Resource Group: apache-group > fs0 (ocf::heartbeat:Filesystem): Started node1 > (unmanaged) FAILED > virtual-ip (ocf::heartbeat:IPaddr2): Stopped > nfs_server (lsb:nfs-kernel-server): Stopped > Master/Slave Set: ms-drbd0 > Masters: [ node0 ] > Slaves: [ node1 ] > nfs_client (ocf::heartbeat:Filesystem): Started node1 > (unmanaged) FAILED > > Failed actions: > nfs_client_start_0 (node=node0, call=98, rc=1, status=complete): > unknown error > fs0_stop_0 (node=node1, call=9, rc=-2, status=Timed Out): unknown > exec error > nfs_client_stop_0 (node=node1, call=7, rc=-2, status=Timed Out): > unknown exec error > > > I really appreciate any idea. Thank you in advance. > > Regards, Sandor > ___ > Openais mailing list > Openais@lists.linux-foundation.org > https://lists.linux-foundation.org/mailm