The branch, 1.0.112 has been updated via b23c575ac94511cc03ee366c73e4fbdf12afa9c9 (commit) via dad19c940d91eae4625f5489f166e3b5e1e3606c (commit) via 0a87e27e85d4075348fc888fc4f9f5d4ef853fb1 (commit) via 0e714cb24b80f0c1b36124cf9a53c9f3796c1965 (commit) from 906e892e1d0c646e01bceddf42ca5df005b42f20 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.112 - Log ----------------------------------------------------------------- commit b23c575ac94511cc03ee366c73e4fbdf12afa9c9 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Wed Apr 28 15:47:19 2010 +1000 Dont check ip assignment across the cluster while ip-verification checks are disabled commit dad19c940d91eae4625f5489f166e3b5e1e3606c Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Wed Apr 28 15:43:11 2010 +1000 The recent change to the recovery daemon to keep track of and verify that all nodes agree on the most recent ip address assignments broke "ctdb moveip ..." since that call would never trigger a full takeover run and thus would immediately trigger an inconsistency. Add a new message to the recovery daemon where we can tell the recovery daemon to update its assignments. BZ62782 commit 0a87e27e85d4075348fc888fc4f9f5d4ef853fb1 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Wed Apr 28 14:47:37 2010 +1000 Make create_merged_ip_list() a static function since it is not called from outside of ctdb_takeover.c commit 0e714cb24b80f0c1b36124cf9a53c9f3796c1965 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Wed Apr 28 14:44:53 2010 +1000 In the log message when we have found an inconsistent ip address allocation, add extra log information about what the inconsistency is. ----------------------------------------------------------------------- Summary of changes: include/ctdb.h | 6 ++++++ include/ctdb_private.h | 2 ++ server/ctdb_recoverd.c | 34 +++++++++++++++++++++++++++++++--- server/ctdb_takeover.c | 25 +++++++++++++++++++++++-- tools/ctdb.c | 9 +++++++++ 5 files changed, 71 insertions(+), 5 deletions(-) Changeset truncated at 500 lines: diff --git a/include/ctdb.h b/include/ctdb.h index 3633751..c380c3d 100644 --- a/include/ctdb.h +++ b/include/ctdb.h @@ -75,6 +75,12 @@ struct ctdb_call_info { */ #define CTDB_SRVID_SET_NODE_FLAGS 0xF400000000000000LL +/* + a message ID to ask the recovery daemon to update the expected node + assignment for a public ip + */ +#define CTDB_SRVID_RECD_UPDATE_IP 0xF500000000000000LL + /* a message to tell the recovery daemon to fetch a set of records */ diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 06985a3..6c70623 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -1566,5 +1566,7 @@ int ctdb_recheck_persistent_health(struct ctdb_context *ctdb); int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips); +int update_ip_assignment_tree(struct ctdb_context *ctdb, + struct ctdb_public_ip *ip); #endif diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c index 6ceb95e..dd678cf 100644 --- a/server/ctdb_recoverd.c +++ b/server/ctdb_recoverd.c @@ -1814,6 +1814,29 @@ static void reenable_ip_check(struct event_context *ev, struct timed_event *te, rec->ip_check_disable_ctx = NULL; } + +static void recd_update_ip_handler(struct ctdb_context *ctdb, uint64_t srvid, + TDB_DATA data, void *private_data) +{ + struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd); + struct ctdb_public_ip *ip; + + if (rec->recmaster != rec->ctdb->pnn) { + DEBUG(DEBUG_INFO,("Not recmaster, ignore update ip message\n")); + return; + } + + if (data.dsize != sizeof(struct ctdb_public_ip)) { + DEBUG(DEBUG_ERR,(__location__ " Incorrect size of recd update ip message. Was %zd but expected %zd bytes\n", data.dsize, sizeof(struct ctdb_public_ip))); + return; + } + + ip = (struct ctdb_public_ip *)data.dptr; + + update_ip_assignment_tree(rec->ctdb, ip); +} + + static void disable_ip_check_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *private_data) { @@ -2807,6 +2830,9 @@ static void monitor_cluster(struct ctdb_context *ctdb) /* register a message port for disabling the ip check for a short while */ ctdb_set_message_handler(ctdb, CTDB_SRVID_DISABLE_IP_CHECK, disable_ip_check_handler, rec); + /* register a message port for updating the recovery daemons node assignment for an ip */ + ctdb_set_message_handler(ctdb, CTDB_SRVID_RECD_UPDATE_IP, recd_update_ip_handler, rec); + again: if (mem_ctx) { talloc_free(mem_ctx); @@ -3098,9 +3124,11 @@ again: goto again; } - if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->public_ips)) { - DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn)); - rec->need_takeover_run = true; + if (rec->ip_check_disable_ctx == NULL) { + if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->public_ips)) { + DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn)); + rec->need_takeover_run = true; + } } } diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c index acc9ce3..0288dd2 100644 --- a/server/ctdb_takeover.c +++ b/server/ctdb_takeover.c @@ -685,7 +685,7 @@ void getips_count_callback(void *param, void *data) *ip_list = new_ip; } -struct ctdb_public_ip_list * +static struct ctdb_public_ip_list * create_merged_ip_list(struct ctdb_context *ctdb) { int i, j; @@ -2185,10 +2185,31 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_publi } if (tmp_ip->pnn != ips->ips[i].pnn) { - DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation.\n")); + DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn)); return -1; } } return 0; } + +int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip) +{ + struct ctdb_public_ip_list *tmp_ip; + + if (ctdb->ip_tree == NULL) { + DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n")); + return -1; + } + + tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr)); + if (tmp_ip == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr))); + return -1; + } + + DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn)); + tmp_ip->pnn = ip->pnn; + + return 0; +} diff --git a/tools/ctdb.c b/tools/ctdb.c index b2284b2..f91c77d 100644 --- a/tools/ctdb.c +++ b/tools/ctdb.c @@ -1032,6 +1032,15 @@ static int move_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr, uint32_t pnn return -1; } + /* update the recovery daemon so it now knows to expect the new + node assignment for this ip. + */ + ret = ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_RECD_UPDATE_IP, data); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to send message to update the ip on the recovery master.\n")); + return -1; + } + talloc_free(tmp_ctx); return 0; } -- CTDB repository