The branch, master has been updated via 4037b6e73a819a8e2463dfe0959b42875e05e106 (commit) via 227fe99f105bdc3a4f1000f238cbe3adeb3f22f0 (commit) via cc30ee2f4f33cb75b2be980c2d4dff6c7c23852f (commit) via a10fc51f4c30e85ada6d4b7347b0f9a8ebc76637 (commit) from 20678e1506db1f96b58c326ee91339e797c07c22 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 4037b6e73a819a8e2463dfe0959b42875e05e106 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Oct 27 15:45:03 2009 +1100 add a check that winbind can actually talk to teh dc during the startup event and refuse to start up if it can not commit 227fe99f105bdc3a4f1000f238cbe3adeb3f22f0 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Oct 27 15:17:45 2009 +1100 temporarily try allowing clients to attach to databases even if the node is banned/stopped or inactive in any other way. commit cc30ee2f4f33cb75b2be980c2d4dff6c7c23852f Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Oct 27 13:51:45 2009 +1100 dont run the monitor event so frequently after a event has failed. use _exit() instead of exit() when terminating an eventscript. commit a10fc51f4c30e85ada6d4b7347b0f9a8ebc76637 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Oct 27 13:18:52 2009 +1100 for debugging add a global variable holding the pid of the main daemon. change the tracking of time() in the event loop to only check/warn when called from the main daemon ----------------------------------------------------------------------- Summary of changes: client/ctdb_client.c | 2 ++ config/events.d/50.samba | 4 ++++ include/ctdb_private.h | 5 +++++ lib/events/events_epoll.c | 22 +++++++++++++--------- lib/events/events_select.c | 22 +++++++++++++--------- lib/events/events_standard.c | 22 +++++++++++++--------- server/ctdb_daemon.c | 4 +++- server/ctdb_ltdb_server.c | 25 ++++++++++++++----------- server/ctdb_monitor.c | 4 ++-- server/eventscript.c | 2 +- tools/ctdb.c | 1 - 11 files changed, 70 insertions(+), 43 deletions(-) Changeset truncated at 500 lines: diff --git a/client/ctdb_client.c b/client/ctdb_client.c index e8c543f..d4130cd 100644 --- a/client/ctdb_client.c +++ b/client/ctdb_client.c @@ -30,6 +30,8 @@ #include "../include/ctdb_private.h" #include "lib/util/dlinklist.h" +pid_t ctdbd_pid; + /* allocate a packet for use in client<->daemon communication */ diff --git a/config/events.d/50.samba b/config/events.d/50.samba index 814fb9a..1785f4d 100755 --- a/config/events.d/50.samba +++ b/config/events.d/50.samba @@ -171,6 +171,10 @@ case $cmd in killall -q -9 winbindd } service "$CTDB_SERVICE_WINBIND" start + wbinfo -t || { + echo "Startup failed. wbinfo -t returned error." + exit 1 + } } # start Samba service. Start it reniced, as under very heavy load diff --git a/include/ctdb_private.h b/include/ctdb_private.h index ad84628..e73913a 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -54,6 +54,11 @@ struct takeover_run_reply { }; /* + * pid of the ctdbd daemon + */ +extern pid_t ctdbd_pid; + +/* a tcp connection description */ struct ctdb_tcp_connection { diff --git a/lib/events/events_epoll.c b/lib/events/events_epoll.c index b7172a7..0dfdb79 100644 --- a/lib/events/events_epoll.c +++ b/lib/events/events_epoll.c @@ -29,6 +29,8 @@ #include "lib/events/events_internal.h" #include <sys/epoll.h> +extern pid_t ctdbd_pid; + struct epoll_event_context { /* a pointer back to the generic event_context */ struct event_context *ev; @@ -466,17 +468,19 @@ static int epoll_event_loop_wait(struct event_context *ev) if (epoll_event_loop_once(ev) != 0) { break; } - new_t=time(NULL); - if (t != 0) { - if (t > new_t) { - DEBUG(0,("ERROR Time skipped backward by %d seconds\n", (int)(t-new_t))); - } - /* We assume here that we get at least one event every 5 seconds */ - if (new_t > (t+5)) { - DEBUG(0,("ERROR Time jumped forward by %d seconds\n", (int)(new_t-t))); + if (getpid() == ctdbd_pid) { + new_t=time(NULL); + if (t != 0) { + if (t > new_t) { + DEBUG(0,("ERROR Time skipped backward by %d seconds\n", (int)(t-new_t))); + } + /* We assume here that we get at least one event every 5 seconds */ + if (new_t > (t+5)) { + DEBUG(0,("ERROR Time jumped forward by %d seconds\n", (int)(new_t-t))); + } } + t=new_t; } - t=new_t; } return 0; diff --git a/lib/events/events_select.c b/lib/events/events_select.c index c908b12..404cd8c 100644 --- a/lib/events/events_select.c +++ b/lib/events/events_select.c @@ -31,6 +31,8 @@ #include "lib/events/events.h" #include "lib/events/events_internal.h" +extern pid_t ctdbd_pid; + struct select_event_context { /* a pointer back to the generic event_context */ struct event_context *ev; @@ -281,17 +283,19 @@ static int select_event_loop_wait(struct event_context *ev) if (select_event_loop_once(ev) != 0) { break; } - new_t=time(NULL); - if (t != 0) { - if (t > new_t) { - DEBUG(0,("ERROR Time skipped backward by %d seconds\n", (int)(t-new_t))); - } - /* We assume here that we get at least one event every 5 seconds */ - if (new_t > (t+5)) { - DEBUG(0,("ERROR Time jumped forward by %d seconds\n", (int)(new_t-t))); + if (getpid() == ctdbd_pid) { + new_t=time(NULL); + if (t != 0) { + if (t > new_t) { + DEBUG(0,("ERROR Time skipped backward by %d seconds\n", (int)(t-new_t))); + } + /* We assume here that we get at least one event every 5 seconds */ + if (new_t > (t+5)) { + DEBUG(0,("ERROR Time jumped forward by %d seconds\n", (int)(new_t-t))); + } } + t=new_t; } - t=new_t; } return select_ev->exit_code; diff --git a/lib/events/events_standard.c b/lib/events/events_standard.c index 35516ea..9ee2328 100644 --- a/lib/events/events_standard.c +++ b/lib/events/events_standard.c @@ -36,6 +36,8 @@ #include "lib/events/events.h" #include "lib/events/events_internal.h" +extern pid_t ctdbd_pid; + struct std_event_context { /* a pointer back to the generic event_context */ struct event_context *ev; @@ -582,17 +584,19 @@ static int std_event_loop_wait(struct event_context *ev) if (std_event_loop_once(ev) != 0) { break; } - new_t=time(NULL); - if (t != 0) { - if (t > new_t) { - DEBUG(0,("ERROR Time skipped backward by %d seconds\n", (int)(t-new_t))); - } - /* We assume here that we get at least one event every 5 seconds */ - if (new_t > (t+5)) { - DEBUG(0,("ERROR Time jumped forward by %d seconds\n", (int)(new_t-t))); + if (getpid() == ctdbd_pid) { + new_t=time(NULL); + if (t != 0) { + if (t > new_t) { + DEBUG(0,("ERROR Time skipped backward by %d seconds\n", (int)(t-new_t))); + } + /* We assume here that we get at least one event every 5 seconds */ + if (new_t > (t+5)) { + DEBUG(0,("ERROR Time jumped forward by %d seconds\n", (int)(new_t-t))); + } } + t=new_t; } - t=new_t; } return std_ev->exit_code; diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c index a8dc651..54a47c1 100644 --- a/server/ctdb_daemon.c +++ b/server/ctdb_daemon.c @@ -36,7 +36,6 @@ static void print_exit_message(void) DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n")); } - /* called when the "startup" event script has finished */ static void ctdb_start_transport(struct ctdb_context *ctdb) { @@ -690,6 +689,9 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork) } block_signal(SIGPIPE); + ctdbd_pid = getpid(); + DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid)); + if (ctdb->do_setsched) { /* try to set us up as realtime */ ctdb_set_scheduler(ctdb); diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c index ec29076..54b4f5e 100644 --- a/server/ctdb_ltdb_server.c +++ b/server/ctdb_ltdb_server.c @@ -316,7 +316,9 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, { const char *db_name = (const char *)indata.dptr; struct ctdb_db_context *db; +#if 0 struct ctdb_node *node = ctdb->nodes[ctdb->pnn]; +#endif /* the client can optionally pass additional tdb flags, but we only allow a subset of those on the database in ctdb. Note @@ -324,25 +326,26 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, srvid to the attach control */ tdb_flags &= TDB_NOSYNC; - /* If the node is inactive it is not part of the cluster - and we should not allow clients to attach to any - databases - */ - if (node->flags & NODE_FLAGS_INACTIVE) { - DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (disconnected or banned)\n", db_name)); - return -1; - } - - /* see if we already have this name */ db = ctdb_db_handle(ctdb, db_name); - if (db) { + if (db != NULL) { outdata->dptr = (uint8_t *)&db->db_id; outdata->dsize = sizeof(db->db_id); tdb_add_flags(db->ltdb->tdb, tdb_flags); return 0; } +#if 0 + /* If the node is inactive it is not part of the cluster + and we should not allow clients to attach to any new + databases + */ + if (node->flags & NODE_FLAGS_INACTIVE) { + DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (disconnected or banned)\n", db_name)); + return -1; + } +#endif + if (ctdb_local_attach(ctdb, db_name, persistent) != 0) { return -1; } diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c index 8997535..f2d6d18 100644 --- a/server/ctdb_monitor.c +++ b/server/ctdb_monitor.c @@ -126,7 +126,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p) if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) { DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n")); node->flags |= NODE_FLAGS_UNHEALTHY; - ctdb->monitor->next_interval = 1; + ctdb->monitor->next_interval = 5; if (ctdb->tunable.disable_when_unhealthy != 0) { DEBUG(DEBUG_INFO, ("DISABLING node since it became unhealthy\n")); node->flags |= NODE_FLAGS_DISABLED; @@ -144,7 +144,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p) } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) { DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n")); node->flags &= ~NODE_FLAGS_UNHEALTHY; - ctdb->monitor->next_interval = 1; + ctdb->monitor->next_interval = 5; ctdb_run_notification_script(ctdb, "healthy"); diff --git a/server/eventscript.c b/server/eventscript.c index 3b86615..b4f5e2a 100644 --- a/server/eventscript.c +++ b/server/eventscript.c @@ -53,7 +53,7 @@ static void sigterm(int sig) /* all the child processes will be running in the same process group */ kill(-getpgrp(), SIGKILL); - exit(1); + _exit(1); } struct ctdb_event_script_state { diff --git a/tools/ctdb.c b/tools/ctdb.c index 2f78ebe..bad4450 100644 --- a/tools/ctdb.c +++ b/tools/ctdb.c @@ -31,7 +31,6 @@ #include "../common/rb_tree.h" #include "db_wrap.h" - #define ERR_TIMEOUT 20 /* timed out trying to reach node */ #define ERR_NONODE 21 /* node does not exist */ #define ERR_DISNODE 22 /* node is disconnected */ -- CTDB repository