The branch, master has been updated
       via  c386f2c62f06f1c60047b7d4b1ec7a9eec11873c (commit)
       via  80b8889267339b870868841ff077e850bc5b52e2 (commit)
       via  93df096773c89f21f77b3bcf9aa90bf28881b852 (commit)
       via  942f44123350d4d0c4ad7f3fcd5ff2d0d175739b (commit)
      from  1261f3d9702800a4e59550c881350daf479f00ef (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit c386f2c62f06f1c60047b7d4b1ec7a9eec11873c
Author: Stefan Metzmacher <me...@samba.org>
Date:   Tue Aug 31 09:28:34 2010 +0200

    server/banning: also release all ips if we're banning ourself
    
    metze

commit 80b8889267339b870868841ff077e850bc5b52e2
Author: Stefan Metzmacher <me...@samba.org>
Date:   Mon Aug 30 18:25:28 2010 +0200

    server/recoverd: if we can't get the recovery lock, ban ourself
    
    metze

commit 93df096773c89f21f77b3bcf9aa90bf28881b852
Author: Stefan Metzmacher <me...@samba.org>
Date:   Tue Aug 31 08:42:32 2010 +0200

    server/recoverd: do takeover_run after verifying the reclock file
    
    metze

commit 942f44123350d4d0c4ad7f3fcd5ff2d0d175739b
Author: Stefan Metzmacher <me...@samba.org>
Date:   Tue Aug 24 09:22:49 2010 +0200

    server/monitor: ask for a takeoverrun after propagating our new flags
    
    metze

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h |    1 +
 server/ctdb_banning.c  |   30 +++++++++++++++++++++++++++++-
 server/ctdb_monitor.c  |   48 +++++++++++++++---------------------------------
 server/ctdb_recoverd.c |   15 +++++++++------
 4 files changed, 54 insertions(+), 40 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index b707afd..89b8f08 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -1291,6 +1291,7 @@ int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
 int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA 
indata);
 
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb);
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA 
*outdata);
 int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA 
indata);
diff --git a/server/ctdb_banning.c b/server/ctdb_banning.c
index 3d5f216..5684907 100644
--- a/server/ctdb_banning.c
+++ b/server/ctdb_banning.c
@@ -42,6 +42,31 @@ ctdb_ban_node_event(struct event_context *ev, struct 
timed_event *te,
        }
 }
 
+int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb)
+{
+       uint32_t i;
+
+       /* make sure we are frozen */
+       DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and 
recovery\n"));
+
+       /* Reset the generation id to 1 to make us ignore any
+          REQ/REPLY CALL/DMASTER someone sends to us.
+          We are now banned so we shouldnt service database calls
+          anymore.
+       */
+       ctdb->vnn_map->generation = INVALID_GENERATION;
+
+       for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+               if (ctdb_start_freeze(ctdb, i) != 0) {
+                       DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db 
priority %u\n", i));
+               }
+       }
+       ctdb_release_all_ips(ctdb);
+       ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+
+       return 0;
+}
+
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 {
        struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)indata.dptr;
@@ -96,7 +121,10 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context 
*ctdb, TDB_DATA indata)
        ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
 
        event_add_timed(ctdb->ev, ctdb->banning_ctx, 
timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb);
-       
+       if (bantime->pnn == ctdb->pnn) {
+               return ctdb_local_node_got_banned(ctdb);
+       }
+
        return 0;
 }
 
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 7f5da5c..dff6f42 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -114,6 +114,7 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, 
int status, void *p)
        int ret;
        TDB_DATA rddata;
        struct takeover_run_reply rd;
+       const char *state_str = NULL;
 
        c.pnn = ctdb->pnn;
        c.old_flags = node->flags;
@@ -141,28 +142,12 @@ static void ctdb_health_callback(struct ctdb_context 
*ctdb, int status, void *p)
                ctdb->monitor->next_interval = 5;
 
                ctdb_run_notification_script(ctdb, "unhealthy");
-
-               /* ask the recmaster to reallocate all addresses */
-               DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u 
to perform ip reallocation\n", ctdb->recovery_master));
-               ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, 
CTDB_SRVID_TAKEOVER_RUN, rddata);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR,(__location__ " Failed to send ip 
takeover run request message to %u\n", ctdb->recovery_master));
-               }
-
        } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
                DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
                node->flags &= ~NODE_FLAGS_UNHEALTHY;
                ctdb->monitor->next_interval = 5;
 
                ctdb_run_notification_script(ctdb, "healthy");
-
-               /* ask the recmaster to reallocate all addresses */
-               DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u 
to perform ip reallocation\n", ctdb->recovery_master));
-               ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, 
CTDB_SRVID_TAKEOVER_RUN, rddata);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR,(__location__ " Failed to send ip 
takeover run request message to %u\n", ctdb->recovery_master));
-               }
-
        }
 
 after_change_status:
@@ -190,6 +175,19 @@ after_change_status:
        ctdb_daemon_send_message(ctdb, ctdb->pnn,
                                 CTDB_SRVID_PUSH_NODE_FLAGS, data);
 
+       if (c.new_flags & NODE_FLAGS_UNHEALTHY) {
+               state_str = "UNHEALTHY";
+       } else {
+               state_str = "HEALTHY";
+       }
+
+       /* ask the recmaster to reallocate all addresses */
+       DEBUG(DEBUG_ERR,("Node became %s. Ask recovery master %u to perform ip 
reallocation\n",
+                        state_str, ctdb->recovery_master));
+       ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, 
CTDB_SRVID_TAKEOVER_RUN, rddata);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run 
request message to %u\n", ctdb->recovery_master));
+       }
 }
 
 
@@ -433,7 +431,6 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, 
TDB_DATA indata)
        struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change 
*)indata.dptr;
        struct ctdb_node *node;
        uint32_t old_flags;
-       int i;
 
        if (c->pnn >= ctdb->num_nodes) {
                DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes 
:%d\n", c->pnn, ctdb->num_nodes));
@@ -483,22 +480,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, 
TDB_DATA indata)
 
        /* if we have become banned, we should go into recovery mode */
        if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & 
NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
-               /* make sure we are frozen */
-               DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze 
and recovery\n"));
-               /* Reset the generation id to 1 to make us ignore any
-                  REQ/REPLY CALL/DMASTER someone sends to us.
-                  We are now banned so we shouldnt service database calls
-                  anymore.
-               */
-               ctdb->vnn_map->generation = INVALID_GENERATION;
-
-               for (i=1; i<=NUM_DB_PRIORITIES; i++) {
-                       if (ctdb_start_freeze(ctdb, i) != 0) {
-                               DEBUG(DEBUG_ERR,(__location__ " Failed to 
freeze db priority %u\n", i));
-                       }
-               }
-               ctdb_release_all_ips(ctdb);
-               ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+               return ctdb_local_node_got_banned(ctdb);
        }
        
        return 0;
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 437e4cb..30c34b3 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1382,8 +1382,10 @@ static int do_recovery(struct ctdb_recoverd *rec,
                DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery 
daemon\n"));
                start_time = timeval_current();
                if (!ctdb_recovery_lock(ctdb, true)) {
-                       ctdb_set_culprit(rec, pnn);
-                       DEBUG(DEBUG_ERR,("Unable to get recovery lock - 
aborting recovery\n"));
+                       DEBUG(DEBUG_ERR,("Unable to get recovery lock - 
aborting recovery "
+                                        "and ban ourself for %u seconds\n",
+                                        ctdb->tunable.recovery_ban_period));
+                       ctdb_ban_node(rec, pnn, 
ctdb->tunable.recovery_ban_period);
                        return -1;
                }
                ctdb_ctrl_report_recd_lock_latency(ctdb, CONTROL_TIMEOUT(), 
timeval_elapsed(&start_time));
@@ -3009,10 +3011,6 @@ static void main_loop(struct ctdb_context *ctdb, struct 
ctdb_recoverd *rec,
                        rec->reallocate_callers = NULL;
                }
        }
-       /* if there are takeovers requested, perform it and notify the waiters 
*/
-       if (rec->reallocate_callers) {
-               process_ipreallocate_requests(ctdb, rec);
-       }
 
        if (rec->recmaster == (uint32_t)-1) {
                DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set 
- forcing election\n"));
@@ -3199,6 +3197,11 @@ static void main_loop(struct ctdb_context *ctdb, struct 
ctdb_recoverd *rec,
                }
        }
 
+       /* if there are takeovers requested, perform it and notify the waiters 
*/
+       if (rec->reallocate_callers) {
+               process_ipreallocate_requests(ctdb, rec);
+       }
+
        /* get the nodemap for all active remote nodes
         */
        remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, 
nodemap->num);


-- 
CTDB repository

Reply via email to