------------------------------------------------------------ revno: 320 revision-id: [EMAIL PROTECTED] parent: [EMAIL PROTECTED] committer: Andrew Tridgell <[EMAIL PROTECTED]> branch nick: tridge timestamp: Fri 2007-05-18 23:48:29 +1000 message: timeout pending controls immediately when a node becomes disconnected modified: common/ctdb.c ctdb.c-20061127094323-t50f58d65iaao5of-2 common/ctdb_daemon.c ctdb_daemon.c-20070409200331-3el1kqgdb9m4ib0g-1 common/ctdb_monitor.c ctdb_monitor.c-20070518100625-8jf4ft1mjzmb22ck-1 include/ctdb_private.h ctdb_private.h-20061117234101-o3qt14umlg9en8z0-13 === modified file 'common/ctdb.c' --- a/common/ctdb.c 2007-05-18 13:23:36 +0000 +++ b/common/ctdb.c 2007-05-18 13:48:29 +0000 @@ -379,6 +379,7 @@ node->flags &= ~NODE_FLAGS_CONNECTED; DEBUG(1,("%s: node %s is dead: %d connected\n", node->ctdb->name, node->name, node->ctdb->num_connected)); + ctdb_daemon_cancel_controls(node->ctdb, node); } /*
=== modified file 'common/ctdb_daemon.c' --- a/common/ctdb_daemon.c 2007-05-18 09:19:35 +0000 +++ b/common/ctdb_daemon.c 2007-05-18 13:48:29 +0000 @@ -836,16 +836,18 @@ struct daemon_control_state { + struct daemon_control_state *next, *prev; struct ctdb_client *client; struct ctdb_req_control *c; uint32_t reqid; + struct ctdb_node *node; }; /* callback when a control reply comes in */ static void daemon_control_callback(struct ctdb_context *ctdb, - uint32_t status, TDB_DATA data, + int32_t status, TDB_DATA data, const char *errormsg, void *private_data) { @@ -880,6 +882,30 @@ } /* + fail all pending controls to a disconnected node + */ +void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node) +{ + struct daemon_control_state *state; + while ((state = node->pending_controls)) { + DLIST_REMOVE(node->pending_controls, state); + daemon_control_callback(ctdb, (uint32_t)-1, tdb_null, + "node is disconnected", state); + } +} + +/* + destroy a daemon_control_state + */ +static int daemon_control_destructor(struct daemon_control_state *state) +{ + if (state->node) { + DLIST_REMOVE(state->node->pending_controls, state); + } + return 0; +} + +/* this is called when the ctdb daemon received a ctdb request control from a local client over the unix domain socket */ @@ -900,6 +926,14 @@ state->client = client; state->c = talloc_steal(state, c); state->reqid = c->hdr.reqid; + if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) { + state->node = client->ctdb->nodes[c->hdr.destnode]; + DLIST_ADD(state->node->pending_controls, state); + } else { + state->node = NULL; + } + + talloc_set_destructor(state, daemon_control_destructor); data.dptr = &c->data[0]; data.dsize = c->datalen; @@ -912,6 +946,10 @@ DEBUG(0,(__location__ " Failed to send control to remote node %u\n", c->hdr.destnode)); } + + if (c->flags & CTDB_CTRL_FLAG_NOREPLY) { + talloc_free(state); + } } /* === modified file 'common/ctdb_monitor.c' --- a/common/ctdb_monitor.c 2007-05-18 13:23:36 +0000 +++ b/common/ctdb_monitor.c 2007-05-18 13:48:29 +0000 @@ -58,6 +58,7 @@ if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) { DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn)); node->flags &= ~NODE_FLAGS_CONNECTED; + ctdb_daemon_cancel_controls(ctdb, node); /* maybe tell the transport layer to kill the sockets as well? */ === modified file 'include/ctdb_private.h' --- a/include/ctdb_private.h 2007-05-18 13:23:36 +0000 +++ b/include/ctdb_private.h 2007-05-18 13:48:29 +0000 @@ -74,7 +74,7 @@ /* used for callbacks in ctdb_control requests */ typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *, - uint32_t status, TDB_DATA data, + int32_t status, TDB_DATA data, const char *errormsg, void *private_data); @@ -93,6 +93,10 @@ /* used by the dead node monitoring */ uint32_t dead_count; uint32_t rx_cnt; + + /* a list of controls pending to this node, so we can time them out quickly + if the node becomes disconnected */ + struct daemon_control_state *pending_controls; }; /* @@ -823,4 +827,6 @@ int ctdb_start_monitoring(struct ctdb_context *ctdb); void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode); +void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node); + #endif