The branch, 1.0.69 has been updated via 87e674f93728fb20209b6473f07ca323530ddbdc (commit) via 478f43bc3c970edc7c8db3e34095774261a48056 (commit) from a63c79318678abe99d2a36fe4465e63eafc008b7 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.69 - Log ----------------------------------------------------------------- commit 87e674f93728fb20209b6473f07ca323530ddbdc Author: root <r...@rcn1.vsofs1.com> Date: Fri May 1 01:24:27 2009 +1000 new version 1.0.69-5 commit 478f43bc3c970edc7c8db3e34095774261a48056 Author: root <r...@rcn1.vsofs1.com> Date: Fri May 1 01:18:27 2009 +1000 Add a new variable VerifyRecoveryLock which can be used to disable the test that the recovery daemon holds the lock properly when performing a recovery ----------------------------------------------------------------------- Summary of changes: include/ctdb_private.h | 1 + packaging/RPM/ctdb.spec | 8 +++++++- server/ctdb_recover.c | 15 +++++++++++---- server/ctdb_tunables.c | 1 + 4 files changed, 20 insertions(+), 5 deletions(-) Changeset truncated at 500 lines: diff --git a/include/ctdb_private.h b/include/ctdb_private.h index df194bc..5121cd9 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -118,6 +118,7 @@ struct ctdb_tunable { uint32_t recd_ping_failcount; uint32_t log_latency_ms; uint32_t recovery_drop_all_ips; + uint32_t verify_recovery_lock; }; /* diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec index 01d09e0..432a6aa 100644 --- a/packaging/RPM/ctdb.spec +++ b/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team <sa...@samba.org> Name: ctdb Version: 1.0 -Release: 69_4 +Release: 69_5 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -121,6 +121,12 @@ fi %{_includedir}/ctdb_private.h %changelog +* Fri May 1 2009 : Version 1.0.69-5 + - Add a new variable VerifyRecoveryLock. When set to 0 this will skip + the test inside the main where it verifies that the recovery masted does + hold the lock to the reclock file while performing a recovery. + - Change the timeout for waiting for a reclock child process to terminate to + 15 seconds and increase the logging of this potentially fatal condition. * Sun Apr 26 2009 : Version 1.0.69_4 - Add TDB_NO_NESTING to the tdb layer to prevent transaction nesting. - Make sure that when we start a recovery transaction that this is not a diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c index 153f698..c2807b4 100644 --- a/server/ctdb_recover.c +++ b/server/ctdb_recover.c @@ -505,7 +505,7 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even caused by the cluster filesystem being very slow to arbitrate locks immediately after a node failure. */ - DEBUG(DEBUG_NOTICE,(__location__ " set_recmode timeout - allowing recmode set\n")); + DEBUG(DEBUG_ERR,(__location__ " set_recmode child process hung/timedout CFS slow to grant locks? (allowing recmode set anyway)\n")); state->ctdb->recovery_mode = state->recmode; ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL); talloc_free(state); @@ -628,11 +628,17 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, state = talloc(ctdb, struct ctdb_set_recmode_state); CTDB_NO_MEMORY(ctdb, state); + + if (ctdb->tunable.verify_recovery_lock == 0) { + /* dont need to verify the reclock file */ + ctdb->recovery_mode = recmode; + return 0; + } + /* For the rest of what needs to be done, we need to do this in a child process since 1, the call to ctdb_recovery_lock() can block if the cluster filesystem is in the process of recovery. - 2, running of the script may take a while. */ ret = pipe(state->fd); if (ret != 0) { @@ -653,7 +659,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, char cc = 0; close(state->fd[0]); - /* we should not be able to get the lock on the nodes list, + /* we should not be able to get the lock on the reclock file, as it should be held by the recovery master */ if (ctdb_recovery_lock(ctdb, false)) { @@ -665,6 +671,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, /* make sure we die when our parent dies */ while (kill(parent, 0) == 0 || errno != ESRCH) { sleep(5); + write(state->fd[1], &cc, 1); } _exit(0); } @@ -672,7 +679,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, talloc_set_destructor(state, set_recmode_destructor); - state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0), + state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(15, 0), ctdb_set_recmode_timeout, state); state->fde = event_add_fd(ctdb->ev, state, state->fd[0], diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c index fe6a4b4..821236f 100644 --- a/server/ctdb_tunables.c +++ b/server/ctdb_tunables.c @@ -54,6 +54,7 @@ static const struct { { "RecdFailCount", 3, offsetof(struct ctdb_tunable, recd_ping_failcount) }, { "LogLatencyMs", 0, offsetof(struct ctdb_tunable, log_latency_ms) }, { "RecoveryDropAllIPs", 60, offsetof(struct ctdb_tunable, recovery_drop_all_ips) }, + { "VerifyRecoveryLock", 1, offsetof(struct ctdb_tunable, verify_recovery_lock) }, }; /* -- CTDB repository