The branch, 2.5 has been updated via 70c7ef023730d8344ca4afde2c94634dd541101f (commit) via b1758c6a22c16e7798bbb147d5bf8e04b2bf0c55 (commit) via be0e7aba3a569adedc87be74270b079738ad7f13 (commit) via 8518cbd6665e6d1b3402876346454930de2a5ed3 (commit) from 1d4bd9eb387775dca674112913c1bf23be1c3690 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=2.5 - Log ----------------------------------------------------------------- commit 70c7ef023730d8344ca4afde2c94634dd541101f Author: Amitay Isaacs <ami...@gmail.com> Date: Thu Sep 25 17:55:15 2014 +1000 daemon: Fix the usage for lock helper Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> Autobuild-User(master): Martin Schwenke <mart...@samba.org> Autobuild-Date(master): Thu Sep 25 17:16:31 CEST 2014 on sn-devel-104 (Imported from commit 0f92de8463b71a2d7e9acdd27454be7859713436) commit b1758c6a22c16e7798bbb147d5bf8e04b2bf0c55 Author: Amitay Isaacs <ami...@gmail.com> Date: Thu Sep 25 17:17:04 2014 +1000 recoverd: If obtaining recovery lock fails, try again When ctdb daemon starts up, it considers itself the recovery master and tries to do first recovery. However, it's possible that there is already a recovery master and the current node has not yet heard from it. So do not ban ourselves immediately if ctdb_recovery_lock() fails when doing first recovery. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> (Imported from commit 57310f80c9b8146a0978d912f73b0a64fde7697e) commit be0e7aba3a569adedc87be74270b079738ad7f13 Author: Amitay Isaacs <ami...@gmail.com> Date: Thu Sep 25 12:46:22 2014 +1000 scripts: Fix the regular expresssion for parsing /proc/locks The major and minor device numbers are hexadecimal not decimal. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> Autobuild-User(master): Martin Schwenke <mart...@samba.org> Autobuild-Date(master): Thu Sep 25 07:19:59 CEST 2014 on sn-devel-104 (Imported from commit f1e281cd47d9ebd79e09294606b8fa411ec0fbb4) commit 8518cbd6665e6d1b3402876346454930de2a5ed3 Author: Amitay Isaacs <ami...@gmail.com> Date: Thu Sep 25 12:44:59 2014 +1000 locking: Reset ttimer before doing an early return When timer expires, timeout handler routine sets lock_ctx->ttimer to a newly created timer event. However, when a node is INACTIVE, timeout handler returns early with lock_ctx->ttimer set to the previous timer event. This timer event gets freed when the callback returns and lock_ctx->ttimer remains set to already freed timer event. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> (Imported from commit c64369cba2e5a975d87d518737abbf04c9871a26) ----------------------------------------------------------------------- Summary of changes: config/debug_locks.sh | 2 +- server/ctdb_lock.c | 1 + server/ctdb_lock_helper.c | 4 ++-- server/ctdb_recoverd.c | 18 ++++++++++++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) Changeset truncated at 500 lines: diff --git a/config/debug_locks.sh b/config/debug_locks.sh index 54b52ed..33bf3e6 100755 --- a/config/debug_locks.sh +++ b/config/debug_locks.sh @@ -28,7 +28,7 @@ loadconfig ctdb sed_cmd=$( ls -li "$CTDB_DBDIR"/*.tdb.* "$CTDB_DBDIR_PERSISTENT"/*.tdb.* | sed -e "s#${CTDB_DBDIR}/\(.*\)#\1#" \ -e "s#${CTDB_DBDIR_PERSISTENT}/\(.*\)#\1#" | - awk '{printf "s#[0-9]*:[0-9]*:%s #%s #\n", $1, $10}' ) + awk '{printf "s#[0-9a-f]*:[0-9a-f]*:%s #%s #\n", $1, $10}' ) # Parse /proc/locks and extract following information # pid process_name tdb_name offsets [W] diff --git a/server/ctdb_lock.c b/server/ctdb_lock.c index a866835..8292599 100644 --- a/server/ctdb_lock.c +++ b/server/ctdb_lock.c @@ -492,6 +492,7 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev, /* If a node stopped/banned, don't spam the logs */ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) { + lock_ctx->ttimer = NULL; return; } if (lock_ctx->ctdb_db) { diff --git a/server/ctdb_lock_helper.c b/server/ctdb_lock_helper.c index 261a048..f164769 100644 --- a/server/ctdb_lock_helper.c +++ b/server/ctdb_lock_helper.c @@ -36,9 +36,9 @@ static void send_result(int fd, char result) static void usage(void) { fprintf(stderr, "\n"); - fprintf(stderr, "Usage: %s <ctdbd-pid> <output-fd> RECORD <db-path> <db-key>\n", + fprintf(stderr, "Usage: %s <log-fd> <ctdbd-pid> <output-fd> RECORD <db-path> <db-key>\n", progname); - fprintf(stderr, " %s <ctdbd-pid> <output-fd> DB <db1-path> [<db2-path> ...]\n", + fprintf(stderr, " %s <log-fd> <ctdbd-pid> <output-fd> DB <db1-path> [<db2-path> ...]\n", progname); } diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c index 9b553b7..d3c06b4 100644 --- a/server/ctdb_recoverd.c +++ b/server/ctdb_recoverd.c @@ -1815,6 +1815,16 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n")); start_time = timeval_current(); if (!ctdb_recovery_lock(ctdb, true)) { + if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) { + /* If ctdb is trying first recovery, it's + * possible that current node does not know yet + * who the recmaster is. + */ + DEBUG(DEBUG_ERR, ("Unable to get recovery lock" + " - retrying recovery\n")); + return -1; + } + DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery " "and ban ourself for %u seconds\n", ctdb->tunable.recovery_ban_period)); @@ -3593,6 +3603,14 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, return; } + /* get runstate */ + ret = ctdb_ctrl_get_runstate(ctdb, CONTROL_TIMEOUT(), + CTDB_CURRENT_NODE, &ctdb->runstate); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Failed to get runstate - retrying\n")); + return; + } + /* get the current recovery lock file from the server */ if (update_recovery_lock_file(ctdb) != 0) { DEBUG(DEBUG_ERR,("Failed to update the recovery lock file\n")); -- CTDB repository