The branch, master has been updated via cb2c05d5d3f8908eecdad1ae6a1dc8efa1ffcb1e (commit) via 9e08ab6c9f54f5a2c5790927e9aff107ca85a2cc (commit) via 0f1883c69c689b28b0c04148774840b2c4081df6 (commit) via 8556e9dc897c6b9b9be0b52f391effb1f72fbd80 (commit) via e513277fb09b951427be8351d04c877e0a15359d (commit) via 7e587acaf8006254e89ff9b4bf48454821c85863 (commit) via 34b952e4adc53ee82345275a0e28231fa1b2533e (commit) via 50f1255ea9ed15bb8fa11cf838b29afa77e857fd (commit) via 40c7a536c6b428caef7904a1de860d82a70748af (commit) via 56d9c8b222436814fa39bc583318e6fd8e6c74c3 (commit) via 9395a05de669c69396e701fb36409ec49d3ebef6 (commit) via 8e894d8baf20a455b50c5c1b1ac0540d9e766c5d (commit) via a63825e32658b36e0964584758b9a276c18056b8 (commit) via 4063aed8c6babf02726a1663375ea5d32c423e8c (commit) via 79ca87e53dc4c1c73c511680d28db644140a326c (commit) via 7dad1c34f94a433bbb5784cb7156b84bd2e8cd1b (commit) via 772052e071718f20a19d24d5e06a5a2ef87549f2 (commit) via 42ceac4d7f31470e9d626a1709de79658aebde7e (commit) via cbf79b2158ab21a58aef967e89f0bd60890a7972 (commit) via 0a4e667f42c6fb23be13651f7b0d0a545a49900b (commit) via eff3f326f8ce6070c9f3c430cd14d1b71a8db220 (commit) via 3fad7d67f2c66ac3a65cfd821fd6db6342f4a3f0 (commit) via 876d3aca18c27c2239116c8feb6582b3a68c6571 (commit) via 539bbdd9b0d0346b42e66ef2fcfb16f39bbe098b (commit) via 171d1d71ef9f2373620bd7da3adaecb405338603 (commit) from badf34692449bf658cef488c0da6c3eb90187555 (commit)
http://gitweb.samba.org/?p=tridge/ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit cb2c05d5d3f8908eecdad1ae6a1dc8efa1ffcb1e Merge: 4063aed8c6babf02726a1663375ea5d32c423e8c 9e08ab6c9f54f5a2c5790927e9aff107ca85a2cc Author: Andrew Tridgell <[EMAIL PROTECTED]> Date: Thu May 8 16:58:34 2008 +1000 Merge branch 'master' of git://git.samba.org/sahlberg/ctdb commit 9e08ab6c9f54f5a2c5790927e9aff107ca85a2cc Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed May 7 11:31:37 2008 +1000 update to version .35 commit 0f1883c69c689b28b0c04148774840b2c4081df6 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 15:42:59 2008 +1000 Expand the client async framework so that it can take a callback function. This allows us to use the async framework also for controls that return outdata. Add a "capabilities" field to the ctdb_node structure. This field is only initialized and kept valid inside the recovery daemon context and not inside the main ctdb daemon. change the GET_CAPABILITIES control to return the capabilities in outdata instead of in the res return variable. When performing a recovery inside the recovery daemon, read the capabilities from all connected nodes and update the ctdb->nodes list of nodes. when building the new vnnmap after the database rebuild in recovery, do not include any nodes which lack the LMASTER capability in the new vnnmap. Unless there are no available connected node that sports the LMASTER capability in which case we let the local node (recmaster) take on the lmaster role temporarily (i.e. become a member of the vnnmap list) commit 8556e9dc897c6b9b9be0b52f391effb1f72fbd80 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 13:56:56 2008 +1000 make sure we lose all elections for recmaster role if we do not have the recmaster capability. (unless there are no other node at all available with this capability) commit e513277fb09b951427be8351d04c877e0a15359d Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 13:27:17 2008 +1000 close and reopen the reclock pnn file at regular intervals. handle failure to get/hold the reclock pnn file better and just treat it as a transient backend filesystem error and try again later instead of shutting down the recovery daemon when we have lost the pnn file and if we are recmaster release the recmaster role so that someone else can become recmaster isntead commit 7e587acaf8006254e89ff9b4bf48454821c85863 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 11:19:17 2008 +1000 Monitor that the recovery daemon is still running from the main ctdb daemon and if it has terminated, then we shut down the main daemon as well commit 34b952e4adc53ee82345275a0e28231fa1b2533e Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 10:41:22 2008 +1000 Add ability to disable recmaster and lmaster roles through sysconfig file and command line arguments commit 50f1255ea9ed15bb8fa11cf838b29afa77e857fd Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 10:02:27 2008 +1000 Add a capabilities field to the ctdb structure Define two capabilities : can be recmaster can be lmaster Default both capabilities to YES Update the ctdb tool to read capabilities off a node commit 40c7a536c6b428caef7904a1de860d82a70748af Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue May 6 07:57:43 2008 +1000 Use DEBUG_ERR and not DEBUG_WARNING when we get a connection attempt from a non-ctdb host commit 56d9c8b222436814fa39bc583318e6fd8e6c74c3 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 24 22:06:04 2008 +1000 update version to .34 commit 9395a05de669c69396e701fb36409ec49d3ebef6 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 24 21:51:08 2008 +1000 when deleting a public ip from a node that is currently hosting this ip, try to move the ip address to a different node first commit 8e894d8baf20a455b50c5c1b1ac0540d9e766c5d Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed Apr 23 21:49:52 2008 +1000 make 'ctdb catdb' produce output that resembles the output of tdbdump commit a63825e32658b36e0964584758b9a276c18056b8 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed Apr 23 21:05:36 2008 +1000 when adding a new public ip address to a running node using the 'ctdb addip' command, If no other node is hosting this public ip at the moment, then assign it immediately to the current node. commit 4063aed8c6babf02726a1663375ea5d32c423e8c Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 10 14:45:45 2008 +1000 Revert "- accept an optional set of tdb_flags from clients on open a database," This reverts commit 49330f97c78ca0669615297ac3d8498651831214. commit 79ca87e53dc4c1c73c511680d28db644140a326c Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 10 14:57:41 2008 +1000 Revert "Revert "- accept an optional set of tdb_flags from clients on open a database,"" This reverts commit 171d1d71ef9f2373620bd7da3adaecb405338603. commit 7dad1c34f94a433bbb5784cb7156b84bd2e8cd1b Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 10 15:59:51 2008 +1000 Revert "Revert "Revert "- accept an optional set of tdb_flags from clients on open a database,""" remove the transaction stuff and push so that the git tree will work This reverts commit 539bbdd9b0d0346b42e66ef2fcfb16f39bbe098b. commit 772052e071718f20a19d24d5e06a5a2ef87549f2 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue Apr 15 18:24:48 2008 +1000 make ctdb eventrscipt accept the -n all argument to run the event script on all connected nodes commit 42ceac4d7f31470e9d626a1709de79658aebde7e Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue Apr 22 00:56:27 2008 +1000 when a node disgrees with us re who is recmaster make it mark that node as a lcuprit so it eventually gets banned commit cbf79b2158ab21a58aef967e89f0bd60890a7972 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed Apr 23 00:55:57 2008 +1000 add support for -n all in "ctdb -n all ip" this collects all public addresses from all nodes and presents the public ips for the entire cluster commit 0a4e667f42c6fb23be13651f7b0d0a545a49900b Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed Apr 23 00:55:57 2008 +1000 add support for -n all in "ctdb -n all ip" this collects all public addresses from all nodes and presents the public ips for the entire cluster commit eff3f326f8ce6070c9f3c430cd14d1b71a8db220 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue Apr 22 00:56:27 2008 +1000 when a node disgrees with us re who is recmaster make it mark that node as a lcuprit so it eventually gets banned commit 3fad7d67f2c66ac3a65cfd821fd6db6342f4a3f0 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Tue Apr 15 18:24:48 2008 +1000 make ctdb eventrscipt accept the -n all argument to run the event script on all connected nodes commit 876d3aca18c27c2239116c8feb6582b3a68c6571 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 10 15:59:51 2008 +1000 Revert "Revert "Revert "- accept an optional set of tdb_flags from clients on open a database,""" remove the transaction stuff and push so that the git tree will work This reverts commit 539bbdd9b0d0346b42e66ef2fcfb16f39bbe098b. commit 539bbdd9b0d0346b42e66ef2fcfb16f39bbe098b Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 10 14:57:41 2008 +1000 Revert "Revert "- accept an optional set of tdb_flags from clients on open a database,"" This reverts commit 171d1d71ef9f2373620bd7da3adaecb405338603. commit 171d1d71ef9f2373620bd7da3adaecb405338603 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Thu Apr 10 14:45:45 2008 +1000 Revert "- accept an optional set of tdb_flags from clients on open a database," This reverts commit 49330f97c78ca0669615297ac3d8498651831214. ----------------------------------------------------------------------- Summary of changes: client/ctdb_client.c | 83 +++++++++-- common/ctdb_ltdb.c | 20 +--- config/ctdb.init | 6 + config/ctdb.sysconfig | 17 ++ include/ctdb.h | 9 + include/ctdb_private.h | 21 ++- packaging/RPM/ctdb.spec | 32 ++++- server/ctdb_control.c | 7 +- server/ctdb_ltdb_server.c | 13 +-- server/ctdb_recover.c | 18 ++ server/ctdb_recoverd.c | 165 +++++++++++++++++--- server/ctdbd.c | 13 ++ tcp/tcp_connect.c | 2 +- tools/ctdb.c | 386 +++++++++++++++++++++++++++++++++++++++++---- 14 files changed, 694 insertions(+), 98 deletions(-) Changeset truncated at 500 lines: diff --git a/client/ctdb_client.c b/client/ctdb_client.c index f852e5f..921392c 100644 --- a/client/ctdb_client.c +++ b/client/ctdb_client.c @@ -1839,19 +1839,33 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void * */ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p) { + int i; FILE *f = (FILE *)p; - char *keystr, *datastr; struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr; - keystr = hex_encode_talloc(ctdb, key.dptr, key.dsize); - datastr = hex_encode_talloc(ctdb, data.dptr+sizeof(*h), data.dsize-sizeof(*h)); - fprintf(f, "dmaster: %u\n", h->dmaster); fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn); - fprintf(f, "key: %s\ndata: %s\n", keystr, datastr); - talloc_free(keystr); - talloc_free(datastr); + fprintf(f, "key(%d) = \"", key.dsize); + for (i=0;i<key.dsize;i++) { + if (isascii(key.dptr[i])) { + fprintf(f, "%c", key.dptr[i]); + } else { + fprintf(f, "\\%02X", key.dptr[i]); + } + } + fprintf(f, "\"\n"); + + fprintf(f, "data(%d) = \"", data.dsize); + for (i=sizeof(*h);i<data.dsize;i++) { + if (isascii(data.dptr[i])) { + fprintf(f, "%c", data.dptr[i]); + } else { + fprintf(f, "\\%02X", data.dptr[i]); + } + } + fprintf(f, "\"\n"); + return 0; } @@ -2657,8 +2671,11 @@ int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, ui static void async_callback(struct ctdb_client_control_state *state) { struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data); + struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context); int ret; + TDB_DATA outdata; int32_t res; + uint32_t destnode = state->c->hdr.destnode; /* one more node has responded with recmode data */ data->count--; @@ -2676,13 +2693,16 @@ static void async_callback(struct ctdb_client_control_state *state) state->async.fn = NULL; - ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL); + ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL); if ((ret != 0) || (res != 0)) { if ( !data->dont_log_errors) { DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d\n", ret, (int)res)); } data->fail_count++; } + if ((ret == 0) && (data->callback != NULL)) { + data->callback(ctdb, destnode, res, outdata); + } } @@ -2725,15 +2745,17 @@ int ctdb_client_async_control(struct ctdb_context *ctdb, uint32_t *nodes, struct timeval timeout, bool dont_log_errors, - TDB_DATA data) + TDB_DATA data, + client_async_callback client_callback) { struct client_async_data *async_data; struct ctdb_client_control_state *state; int j, num_nodes; - + async_data = talloc_zero(ctdb, struct client_async_data); CTDB_NO_MEMORY_FATAL(ctdb, async_data); async_data->dont_log_errors = dont_log_errors; + async_data->callback = client_callback; num_nodes = talloc_get_size(nodes) / sizeof(uint32_t); @@ -2857,3 +2879,44 @@ ctdb_read_pnn_lock(int fd, int32_t pnn) return c; } +/* + get capabilities of a remote node + */ +struct ctdb_client_control_state * +ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode) +{ + return ctdb_control_send(ctdb, destnode, 0, + CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null, + mem_ctx, &timeout, NULL); +} + +int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities) +{ + int ret; + int32_t res; + TDB_DATA outdata; + + ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL); + if ( (ret != 0) || (res != 0) ) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n")); + return -1; + } + + if (capabilities) { + *capabilities = *((uint32_t *)outdata.dptr); + } + + return 0; +} + +int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities) +{ + struct ctdb_client_control_state *state; + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + int ret; + + state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode); + ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities); + talloc_free(tmp_ctx); + return ret; +} diff --git a/common/ctdb_ltdb.c b/common/ctdb_ltdb.c index d9e4f2a..e8a334a 100644 --- a/common/ctdb_ltdb.c +++ b/common/ctdb_ltdb.c @@ -150,25 +150,7 @@ int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, memcpy(rec.dptr, header, sizeof(*header)); memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize); - /* if this is a persistent database without NOSYNC then we - will do this via a transaction */ - if (ctdb_db->persistent && !(ctdb_db->client_tdb_flags & TDB_NOSYNC)) { - ret = tdb_transaction_start(ctdb_db->ltdb->tdb); - if (ret != 0) { - DEBUG(DEBUG_CRIT, ("Failed to start local transaction\n")); - goto failed; - } - ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE); - if (ret != 0) { - tdb_transaction_cancel(ctdb_db->ltdb->tdb); - goto failed; - } - ret = tdb_transaction_commit(ctdb_db->ltdb->tdb); - } else { - ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE); - } - -failed: + ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE); talloc_free(rec.dptr); return ret; diff --git a/config/ctdb.init b/config/ctdb.init index bae52c2..922a53d 100755 --- a/config/ctdb.init +++ b/config/ctdb.init @@ -66,6 +66,12 @@ CTDB_OPTIONS="$CTDB_OPTIONS --reclock=$CTDB_RECOVERY_LOCK" [ -z "$CTDB_START_AS_DISABLED" ] || [ "$CTDB_START_AS_DISABLED" != "yes" ] || { CTDB_OPTIONS="$CTDB_OPTIONS --start-as-disabled" } +[ -z "$CTDB_CAPABILITY_RECMASTER" ] || [ "$CTDB_CAPABILITY_RECMASTER" != "no" ] || { + CTDB_OPTIONS="$CTDB_OPTIONS --no-recmaster" +} +[ -z "$CTDB_CAPABILITY_LMASTER" ] || [ "$CTDB_CAPABILITY_LMASTER" != "no" ] || { + CTDB_OPTIONS="$CTDB_OPTIONS --no-lmaster" +} if [ -x /sbin/startproc ]; then init_style="suse" diff --git a/config/ctdb.sysconfig b/config/ctdb.sysconfig index 9d1e434..58edbff 100644 --- a/config/ctdb.sysconfig +++ b/config/ctdb.sysconfig @@ -91,6 +91,23 @@ # the node with "ctdb enable" # CTDB_START_AS_DISABLED="yes" +# LMASTER and RECMASTER capabilities. +# By default all nodes are capable of both being LMASTER for records and +# also for taking the RECMASTER role and perform recovery. +# These parameters can be used to disable these two roles on a node. +# Note: If there are NO available nodes left in a cluster that can perform +# the RECMASTER role, the cluster will not be able to recover from a failure +# and will remain in RECOVERY mode until an RECMASTER capable node becomes +# available. Same for LMASTER. +# These parametersd are useful for scenarios where you have one "remote" node +# in a cluster and you do not want the remote node to be fully participating +# in the cluster and slow things down. +# For that case, set both roles to "no" for the remote node on the remote site +# but leave the roles default to "yes" on the primary nodes in the central +# datacentre. +# CTDB_CAPABILITY_RECMASTER=yes +# CTDB_CAPABILITY_LMASTER=yes + # where to log messages # the default is /var/log/log.ctdb # CTDB_LOGFILE=/var/log/log.ctdb diff --git a/include/ctdb.h b/include/ctdb.h index bfba37e..95d3f2f 100644 --- a/include/ctdb.h +++ b/include/ctdb.h @@ -536,4 +536,13 @@ uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb, int ctdb_read_pnn_lock(int fd, int32_t pnn); +/* + get capabilities of a remote node + */ +int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities); + +struct ctdb_client_control_state *ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode); + +int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities); + #endif diff --git a/include/ctdb_private.h b/include/ctdb_private.h index d51e2f7..d31b148 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -199,6 +199,11 @@ struct ctdb_node { uint32_t rx_cnt; uint32_t tx_cnt; + /* used to track node capabilities, is only valid/tracked inside the + recovery daemon. + */ + uint32_t capabilities; + /* a list of controls pending to this node, so we can time them out quickly if the node becomes disconnected */ struct daemon_control_state *pending_controls; @@ -332,6 +337,10 @@ enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN #define CTDB_MONITORING_ACTIVE 0 #define CTDB_MONITORING_DISABLED 1 +/* The different capabilities of the ctdb daemon. */ +#define CTDB_CAP_RECMASTER 0x00000001 +#define CTDB_CAP_LMASTER 0x00000002 + /* main state of the ctdb daemon */ struct ctdb_context { struct event_context *ev; @@ -356,6 +365,7 @@ struct ctdb_context { uint32_t num_nodes; uint32_t num_connected; unsigned flags; + uint32_t capabilities; struct idr_context *idr; uint16_t idr_cnt; struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */ @@ -400,7 +410,6 @@ struct ctdb_db_context { struct ctdb_registered_call *calls; /* list of registered calls */ uint32_t seqnum; struct timed_event *te; - uint32_t client_tdb_flags; }; @@ -514,6 +523,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_ADD_PUBLIC_IP = 77, CTDB_CONTROL_DEL_PUBLIC_IP = 78, CTDB_CONTROL_RUN_EVENTSCRIPTS = 79, + CTDB_CONTROL_GET_CAPABILITIES = 80, }; /* @@ -911,7 +921,7 @@ int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode, void *private_data); int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, - TDB_DATA *outdata, uint64_t tdb_flags, bool persistent); + TDB_DATA *outdata, bool persistent); int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id, ctdb_fn_t fn, int id); @@ -1271,10 +1281,13 @@ int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb); int ctdb_set_child_logging(struct ctdb_context *ctdb); +typedef void (*client_async_callback)(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata); + struct client_async_data { bool dont_log_errors; uint32_t count; uint32_t fail_count; + client_async_callback callback; }; void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state); int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data); @@ -1283,12 +1296,14 @@ int ctdb_client_async_control(struct ctdb_context *ctdb, uint32_t *nodes, struct timeval timeout, bool dont_log_errors, - TDB_DATA data); + TDB_DATA data, + client_async_callback client_callback); void ctdb_load_nodes_file(struct ctdb_context *ctdb); int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode); int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata); +int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata); #endif diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec index 6974739..c0ed54c 100644 --- a/packaging/RPM/ctdb.spec +++ b/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team <[EMAIL PROTECTED]> Name: ctdb Version: 1.0 -Release: 33 +Release: 35 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -120,6 +120,36 @@ fi %{_includedir}/ctdb_private.h %changelog +* Wed May 7 2008 : Version 1.0.35 + - During recovery, when we define the new set of lmasters (vnnmap) + only consider those nodes that have the can-be-lmaster capability + when we create the vnnmap. unless there are no nodes available which + supports this capability in which case we allow the recmaster to + become lmaster capable (temporarily). + - Extend the async framework so that we can use paralell async calls + to controls that return data. + - If we do not have the "can be recmaster" capability, make sure we will + lose any recmaster elections, unless there are no nodes available that + have the capability, in which case we "take/win" the election anyway. + - Close and reopen the reclock pnn file at regular intervals. + Make it a non-fatal event if we occasionally fail to open/read/write + to this file. + - Monitor that the recovery daemon is still running from the main ctdb + daemon and shutdown the main daemon when recovery daemon has terminated. + - Add a "ctdb getcapabilities" command to read the capabilities off a node. + - Define two new capabilities : can be recmaster and can be lmaster + and default both capabilities to YES. + - Log denied tcp connection attempts with DEBUG_ERR and not DEBUG_WARNING +* Thu Apr 24 2008 : Version 1.0.34 + - When deleting a public ip from a node, try to migrate the ip to a different + node first. + - Change catdb to produce output similar to tdbdump + - When adding a new public ip address, if this ip does not exist yet in + the cluster, then grab the ip on the local node and activate it. + - When a node disagrees with the recmaster on WHO is the recmaster, then + mark that node as a recovery culprit so it will eventually become + banned. + - Make ctdb eventscript support the -n all argument. * Thu Apr 10 2008 : Version 1.0.33 - Add facilities to include site local adaptations to the eventscript by /etc/ctdb/rc.local which will be read by all eventscripts. diff --git a/server/ctdb_control.c b/server/ctdb_control.c index 4de2730..6c8a4fc 100644 --- a/server/ctdb_control.c +++ b/server/ctdb_control.c @@ -206,10 +206,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, } case CTDB_CONTROL_DB_ATTACH: - return ctdb_control_db_attach(ctdb, indata, outdata, srvid, false); + return ctdb_control_db_attach(ctdb, indata, outdata, false); case CTDB_CONTROL_DB_ATTACH_PERSISTENT: - return ctdb_control_db_attach(ctdb, indata, outdata, srvid, true); + return ctdb_control_db_attach(ctdb, indata, outdata, true); case CTDB_CONTROL_SET_CALL: { struct ctdb_control_set_call *sc = @@ -389,6 +389,9 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_DEL_PUBLIC_IP: return ctdb_control_del_public_address(ctdb, indata); + case CTDB_CONTROL_GET_CAPABILITIES: + return ctdb_control_get_capabilities(ctdb, outdata); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c index 5146ed8..e900f7b 100644 --- a/server/ctdb_ltdb_server.c +++ b/server/ctdb_ltdb_server.c @@ -296,19 +296,12 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, boo a client has asked to attach a new database */ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, - TDB_DATA *outdata, uint64_t tdb_flags, - bool persistent) + TDB_DATA *outdata, bool persistent) { const char *db_name = (const char *)indata.dptr; struct ctdb_db_context *db; struct ctdb_node *node = ctdb->nodes[ctdb->pnn]; - /* the client can optionally pass additional tdb flags, but we - only allow a subset of those on the database in ctdb. Note - that tdb_flags is passed in via the (otherwise unused) - srvid to the attach control */ - tdb_flags &= TDB_NOSYNC; - /* If the node is inactive it is not part of the cluster and we should not allow clients to attach to any databases @@ -324,7 +317,6 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, if (db) { outdata->dptr = (uint8_t *)&db->db_id; outdata->dsize = sizeof(db->db_id); - db->client_tdb_flags |= tdb_flags; return 0; } @@ -338,9 +330,6 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, return -1; } - /* remember the flags the client has specified */ - db->client_tdb_flags = tdb_flags; - outdata->dptr = (uint8_t *)&db->db_id; outdata->dsize = sizeof(db->db_id); diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c index 83e5424..7a96733 100644 --- a/server/ctdb_recover.c +++ b/server/ctdb_recover.c @@ -957,3 +957,21 @@ int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA inda return 0; } + +/* + report capabilities + */ +int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata) +{ + uint32_t *capabilities = NULL; + + capabilities = talloc(outdata, uint32_t); + CTDB_NO_MEMORY(ctdb, capabilities); + *capabilities = ctdb->capabilities; + + outdata->dsize = sizeof(uint32_t); + outdata->dptr = (uint8_t *)capabilities; + + return 0; +} + diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c index 0d1ef02..c3dff32 100644 --- a/server/ctdb_recoverd.c +++ b/server/ctdb_recoverd.c @@ -212,7 +212,7 @@ static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node if (ctdb_client_async_control(ctdb, CTDB_CONTROL_END_RECOVERY, list_of_active_nodes(ctdb, nodemap, tmp_ctx, true), - CONTROL_TIMEOUT(), false, tdb_null) != 0) { + CONTROL_TIMEOUT(), false, tdb_null, NULL) != 0) { DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event. Recovery failed.\n")); talloc_free(tmp_ctx); return -1; @@ -234,7 +234,7 @@ static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_START_RECOVERY, list_of_active_nodes(ctdb, nodemap, tmp_ctx, true), - CONTROL_TIMEOUT(), false, tdb_null) != 0) { + CONTROL_TIMEOUT(), false, tdb_null, NULL) != 0) { DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event. Recovery failed.\n")); talloc_free(tmp_ctx); return -1; @@ -244,6 +244,40 @@ static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_ return 0; } +static void async_getcap_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata) +{ + if ( (outdata.dsize != sizeof(uint32_t)) || (outdata.dptr == NULL) ) { + DEBUG(DEBUG_ERR, (__location__ " Invalid lenght/pointer for getcap callback : %d %p\n", outdata.dsize, outdata.dptr)); -- CTDB repository