The branch, 1.2.40 has been updated via 1520ec0262385894c086740f8486b18f29e3fb80 (commit) via c9a55cacc0c41ba95360849a13b987d96cd03731 (commit) via 59b9d58655319b515bc20de2529bfec127b4c3ab (commit) via 28602eeb613030c49d32156f57614c059b136105 (commit) via 8909931f7dd067e5319a0db15fcb6cb0c335f903 (commit) via 865fec8f04a6b8f7416903c4d02c188bd39b9683 (commit) from d788857b1c96f78b0ffa4d410a935ec59bef9148 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40 - Log ----------------------------------------------------------------- commit 1520ec0262385894c086740f8486b18f29e3fb80 Author: Amitay Isaacs <ami...@gmail.com> Date: Wed Mar 6 17:48:44 2013 +1100 New Version 1.2.59 Signed-off-by: Amitay Isaacs <ami...@gmail.com> commit c9a55cacc0c41ba95360849a13b987d96cd03731 Author: Amitay Isaacs <ami...@gmail.com> Date: Mon Feb 18 18:05:28 2013 +1100 ctdbd: Exec lockwait helper for locking a record Signed-off-by: Amitay Isaacs <ami...@gmail.com> commit 59b9d58655319b515bc20de2529bfec127b4c3ab Author: Amitay Isaacs <ami...@gmail.com> Date: Mon Feb 18 18:04:07 2013 +1100 ctdbd: Create a standalone helper for record locking Signed-off-by: Amitay Isaacs <ami...@gmail.com> commit 28602eeb613030c49d32156f57614c059b136105 Author: Stefan Metzmacher <me...@samba.org> Date: Fri Feb 22 12:45:39 2013 +0100 tevent: optimize adding new timer events There're two cases: 1. Adding a timer with a zero timestamp. Such events were used before we had immediate events. It's likely that there're a lot of this events and we need to add new ones in fifo order. 2. Adding a timer with a real timestamp. As this timestamps typically get higher:-) it's better to traverse the existing list from the tail. This is not completely optimal, but it should be better than before. Signed-off-by: Stefan Metzmacher <me...@samba.org> commit 8909931f7dd067e5319a0db15fcb6cb0c335f903 Author: Amitay Isaacs <ami...@gmail.com> Date: Fri Feb 22 12:59:39 2013 +1100 common/io: For scheduling immediate events use tevent_schedule_immediate tevent_schedule_immediate() is much more efficient at handling events that need to be processed immediately rather than creating timed events with timeval_zero(). Signed-off-by: Amitay Isaacs <ami...@gmail.com> Cherry-pick-from: 11734be353a1e246163eda631d35dfe55d1d6fb1 commit 865fec8f04a6b8f7416903c4d02c188bd39b9683 Author: Amitay Isaacs <ami...@gmail.com> Date: Thu Feb 21 13:16:15 2013 +1100 ctdbd: Add an index db for message list for faster searches When CTDB is busy with lots of smbd, CTDB was spending too much time in daemon_check_srvids() which searches a list of srvids in the registered message handlers. Using a hash based index significantly improves the performance of search in a linked list. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Cherry-pick-from: 3e09f25d419635f6dd679b48fa65370f7860be7d ----------------------------------------------------------------------- Summary of changes: Makefile.in | 9 ++- common/ctdb_io.c | 26 +++-- common/ctdb_message.c | 205 ++++++++++++++++++++++++++++++++++++++--- include/ctdb_private.h | 13 ++- lib/tevent/tevent.c | 1 + lib/tevent/tevent_internal.h | 1 + lib/tevent/tevent_timed.c | 62 +++++++++++-- packaging/RPM/ctdb.spec.in | 10 ++- server/ctdb_daemon.c | 8 +-- server/ctdb_lockwait.c | 105 +++++++++++++++++++--- server/ctdb_lockwait_helper.c | 177 +++++++++++++++++++++++++++++++++++ 11 files changed, 559 insertions(+), 58 deletions(-) create mode 100644 server/ctdb_lockwait_helper.c Changeset truncated at 500 lines: diff --git a/Makefile.in b/Makefile.in index 849abd7..9c4f555 100755 --- a/Makefile.in +++ b/Makefile.in @@ -32,7 +32,7 @@ POPT_OBJ = @POPT_OBJ@ CFLAGS=-g -I$(srcdir)/include -Iinclude -Ilib -Ilib/util -I$(srcdir) \ -I@tallocdir@ -I@tdbdir@/include -I@libreplacedir@ \ -DVARDIR=\"$(localstatedir)\" -DETCDIR=\"$(etcdir)\" \ - -DLOGDIR=\"$(logdir)\" \ + -DLOGDIR=\"$(logdir)\" -DBINDIR=\"$(bindir)\" \ -DUSE_MMAP=1 @CFLAGS@ $(POPT_CFLAGS) LIB_FLAGS=@LDFLAGS@ -Llib @LIBS@ $(POPT_LIBS) @INFINIBAND_LIBS@ @CTDB_PCAP_LDFLAGS@ @@ -75,7 +75,7 @@ TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \ tests/bin/ctdb_takeover_tests tests/bin/ctdb_update_record \ @INFINIBAND_BINS@ -BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify bin/ping_pong bin/ltdbtool +BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify bin/ping_pong bin/ltdbtool bin/ctdb_lockwait_helper SBINS = bin/ctdbd DIRS = lib bin tests/bin @@ -112,6 +112,10 @@ bin/ctdbd: $(CTDB_SERVER_OBJ) @echo Linking $@ @$(CC) $(CFLAGS) -o $@ $(CTDB_SERVER_OBJ) $(LIB_FLAGS) +bin/ctdb_lockwait_helper: server/ctdb_lockwait_helper.o @TDB_OBJ@ + @echo Linking $@ + @$(CC) $(CFLAGS) -o $@ $^ $(LIB_FLAGS) + libctdb/libctdb.a: $(CTDB_LIB_OBJ) @echo Linking $@ -rm -f libctdb.a @@ -255,6 +259,7 @@ install: all ${INSTALLCMD} -m 755 bin/smnotify $(DESTDIR)$(bindir) $(INSTALLCMD) -m 755 bin/ping_pong $(DESTDIR)$(bindir) $(INSTALLCMD) -m 755 bin/ltdbtool $(DESTDIR)$(bindir) + $(INSTALLCMD) -m 755 bin/ctdb_lockwait_helper $(DESTDIR)$(bindir) $(INSTALLCMD) -m 755 libctdb/libctdb.a $(DESTDIR)$(libdir) ${INSTALLCMD} -m 644 include/ctdb.h $(DESTDIR)$(includedir) ${INSTALLCMD} -m 644 include/ctdb_client.h $(DESTDIR)$(includedir) diff --git a/common/ctdb_io.c b/common/ctdb_io.c index 2a12a18..4e164d9 100644 --- a/common/ctdb_io.c +++ b/common/ctdb_io.c @@ -46,6 +46,7 @@ struct ctdb_queue_pkt { struct ctdb_queue { struct ctdb_context *ctdb; + struct tevent_immediate *im; struct ctdb_buffer buffer; /* input buffer */ struct ctdb_queue_pkt *out_queue, *out_queue_tail; uint32_t out_queue_length; @@ -82,8 +83,8 @@ static void dump_packet(unsigned char *data, size_t len) static void queue_process(struct ctdb_queue *queue); -static void queue_process_event(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private_data) +static void queue_process_event(struct tevent_context *ev, struct tevent_immediate *im, + void *private_data) { struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue); @@ -134,9 +135,9 @@ static void queue_process(struct ctdb_queue *queue) queue->buffer.length -= pkt_size; if (queue->buffer.length > 0) { - /* There is more data to be processed, setup timed event */ - event_add_timed(queue->ctdb->ev, queue, timeval_zero(), - queue_process_event, queue); + /* There is more data to be processed, schedule an event */ + tevent_schedule_immediate(queue->im, queue->ctdb->ev, + queue_process_event, queue); } /* It is the responsibility of the callback to free 'data' */ @@ -202,8 +203,8 @@ failed: /* used when an event triggers a dead queue */ -static void queue_dead(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private_data) +static void queue_dead(struct event_context *ev, struct tevent_immediate *im, + void *private_data) { struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue); queue->callback(NULL, 0, queue->private_data); @@ -234,8 +235,8 @@ static void queue_io_write(struct ctdb_queue *queue) talloc_free(queue->fde); queue->fde = NULL; queue->fd = -1; - event_add_timed(queue->ctdb->ev, queue, timeval_zero(), - queue_dead, queue); + tevent_schedule_immediate(queue->im, queue->ctdb->ev, + queue_dead, queue); return; } if (n <= 0) return; @@ -301,8 +302,8 @@ int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length) talloc_free(queue->fde); queue->fde = NULL; queue->fd = -1; - event_add_timed(queue->ctdb->ev, queue, timeval_zero(), - queue_dead, queue); + tevent_schedule_immediate(queue->im, queue->ctdb->ev, + queue_dead, queue); /* yes, we report success, as the dead node is handled via a separate event */ return 0; @@ -412,6 +413,9 @@ struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb, va_end(ap); CTDB_NO_MEMORY_NULL(ctdb, queue->name); + queue->im= tevent_create_immediate(queue); + CTDB_NO_MEMORY_NULL(ctdb, queue->im); + queue->ctdb = ctdb; queue->fd = fd; queue->alignment = alignment; diff --git a/common/ctdb_message.c b/common/ctdb_message.c index 03a4b55..c6506f4 100644 --- a/common/ctdb_message.c +++ b/common/ctdb_message.c @@ -2,6 +2,7 @@ ctdb_message protocol code Copyright (C) Andrew Tridgell 2007 + Copyright (C) Amitay Isaacs 2013 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,16 +29,103 @@ #include "../include/ctdb_private.h" #include "lib/util/dlinklist.h" +static int message_list_db_init(struct ctdb_context *ctdb) +{ + ctdb->message_list_indexdb = tdb_open("messagedb", 8192, + TDB_INTERNAL|TDB_DISALLOW_NESTING, + O_RDWR|O_CREAT, 0); + if (ctdb->message_list_indexdb == NULL) { + DEBUG(DEBUG_ERR, ("Failed to create message list indexdb\n")); + return -1; + } + + return 0; +} + +static int message_list_db_add(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data) +{ + int ret; + + if (ctdb->message_list_indexdb == NULL) { + ret = message_list_db_init(ctdb); + if (ret < 0) { + return -1; + } + } + + ret = tdb_store(ctdb->message_list_indexdb, key, data, TDB_INSERT); + if (ret < 0) { + DEBUG(DEBUG_ERR, ("Failed to add message list handler (%s)\n", + tdb_errorstr(ctdb->message_list_indexdb))); + return -1; + } + + return 0; +} + +static int message_list_db_delete(struct ctdb_context *ctdb, TDB_DATA key) +{ + int ret; + + if (ctdb->message_list_indexdb == NULL) { + return -1; + } + + ret = tdb_delete(ctdb->message_list_indexdb, key); + if (ret < 0) { + DEBUG(DEBUG_ERR, ("Failed to delete message list handler (%s)\n", + tdb_errorstr(ctdb->message_list_indexdb))); + return -1; + } + + return 0; +} + +static int message_list_db_fetch(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA *data) +{ + if (ctdb->message_list_indexdb == NULL) { + return -1; + } + + *data = tdb_fetch(ctdb->message_list_indexdb, key); + if (data->dsize == 0) { + return -1; + } + return 0; +} + /* this dispatches the messages to the registered ctdb message handler */ int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data) { - struct ctdb_message_list *ml; + struct ctdb_message_list_header *h; + struct ctdb_message_list *m; + TDB_DATA key, hdata; + uint64_t srvid_all = CTDB_SRVID_ALL; + int ret; + + key.dptr = (uint8_t *)&srvid; + key.dsize = sizeof(uint64_t); + + ret = message_list_db_fetch(ctdb, key, &hdata); + if (ret == 0) { + h = *(struct ctdb_message_list_header **)hdata.dptr; - for (ml=ctdb->message_list;ml;ml=ml->next) { - if (ml->srvid == srvid || ml->srvid == CTDB_SRVID_ALL) { - ml->message_handler(ctdb, srvid, data, ml->message_private); + for (m=h->m; m; m=m->next) { + m->message_handler(ctdb, srvid, data, m->message_private); + } + } + + key.dptr = (uint8_t *)&srvid_all; + key.dsize = sizeof(uint64_t); + + ret = message_list_db_fetch(ctdb, key, &hdata); + if (ret == 0) { + h = *(struct ctdb_message_list_header **)hdata.dptr; + + for(m=h->m; m; m=m->next) { + m->message_handler(ctdb, srvid, data, m->message_private); } } @@ -58,13 +146,37 @@ void ctdb_request_message(struct ctdb_context *ctdb, struct ctdb_req_header *hdr ctdb_dispatch_message(ctdb, c->srvid, data); } +/* + * When header is freed, remove all the srvid handlers + */ +static int message_header_destructor(struct ctdb_message_list_header *h) +{ + struct ctdb_message_list *m; + TDB_DATA key; + + while (h->m != NULL) { + m = h->m; + DLIST_REMOVE(h->m, m); + TALLOC_FREE(m); + } + + key.dptr = (uint8_t *)&h->srvid; + key.dsize = sizeof(uint64_t); + + message_list_db_delete(h->ctdb, key); + DLIST_REMOVE(h->ctdb->message_list_header, h); + + return 0; +} /* when a client goes away, we need to remove its srvid handler from the list */ static int message_handler_destructor(struct ctdb_message_list *m) { - DLIST_REMOVE(m->ctdb->message_list, m); + struct ctdb_message_list_header *h = m->h; + + DLIST_REMOVE(h->m, m); return 0; } @@ -77,20 +189,47 @@ int ctdb_register_message_handler(struct ctdb_context *ctdb, ctdb_msg_fn_t handler, void *private_data) { + struct ctdb_message_list_header *h; struct ctdb_message_list *m; + TDB_DATA key, data; + int ret; - m = talloc(mem_ctx, struct ctdb_message_list); + m = talloc_zero(mem_ctx, struct ctdb_message_list); CTDB_NO_MEMORY(ctdb, m); - m->ctdb = ctdb; - m->srvid = srvid; m->message_handler = handler; m->message_private = private_data; - - DLIST_ADD(ctdb->message_list, m); - talloc_set_destructor(m, message_handler_destructor); + key.dptr = (uint8_t *)&srvid; + key.dsize = sizeof(uint64_t); + + ret = message_list_db_fetch(ctdb, key, &data); + if (ret < 0) { + /* srvid not registered yet */ + h = talloc_zero(ctdb, struct ctdb_message_list_header); + CTDB_NO_MEMORY(ctdb, h); + + h->ctdb = ctdb; + h->srvid = srvid; + + data.dptr = (uint8_t *)&h; + data.dsize = sizeof(struct ctdb_message_list_header *); + ret = message_list_db_add(ctdb, key, data); + if (ret < 0) { + talloc_free(m); + talloc_free(h); + return -1; + } + DLIST_ADD(ctdb->message_list_header, h); + talloc_set_destructor(h, message_header_destructor); + } else { + h = *(struct ctdb_message_list_header **)data.dptr; + } + + m->h = h; + DLIST_ADD(h->m, m); + talloc_set_destructor(m, message_handler_destructor); return 0; } @@ -100,13 +239,53 @@ int ctdb_register_message_handler(struct ctdb_context *ctdb, */ int ctdb_deregister_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data) { + struct ctdb_message_list_header *h; struct ctdb_message_list *m; + TDB_DATA key, data; + int ret; + + key.dptr = (uint8_t *)&srvid; + key.dsize = sizeof(uint64_t); + + ret = message_list_db_fetch(ctdb, key, &data); + if (ret < 0) { + return -1; + } - for (m=ctdb->message_list;m;m=m->next) { - if (m->srvid == srvid && m->message_private == private_data) { + h = *(struct ctdb_message_list_header **)data.dptr; + for (m=h->m; m; m=m->next) { + if (m->message_private == private_data) { talloc_free(m); + if (h->m == NULL) { + talloc_free(h); + } return 0; } } + return -1; } + + +/* + * check if the given srvid exists + */ +bool ctdb_check_message_handler(struct ctdb_context *ctdb, uint64_t srvid) +{ + struct ctdb_message_list_header *h; + TDB_DATA key, data; + + key.dptr = (uint8_t *)&srvid; + key.dsize = sizeof(uint64_t); + + if (message_list_db_fetch(ctdb, key, &data) < 0) { + return false; + } + + h = *(struct ctdb_message_list_header **)data.dptr; + if (h->m == NULL) { + return false; + } + + return true; +} diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 6f97702..0eef0e3 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -270,10 +270,15 @@ struct ctdb_upcalls { /* list of message handlers - needs to be changed to a more efficient data structure so we can find a message handler given a srvid quickly */ -struct ctdb_message_list { +struct ctdb_message_list_header { + struct ctdb_message_list_header *next, *prev; struct ctdb_context *ctdb; - struct ctdb_message_list *next, *prev; uint64_t srvid; + struct ctdb_message_list *m; +}; +struct ctdb_message_list { + struct ctdb_message_list *next, *prev; + struct ctdb_message_list_header *h; ctdb_msg_fn_t message_handler; void *message_private; }; @@ -451,7 +456,8 @@ struct ctdb_context { const struct ctdb_upcalls *upcalls; /* transport upcalls */ void *private_data; /* private to transport */ struct ctdb_db_context *db_list; - struct ctdb_message_list *message_list; + struct ctdb_message_list_header *message_list_header; + struct tdb_context *message_list_indexdb; struct ctdb_daemon_data daemon; struct ctdb_statistics statistics; struct ctdb_statistics statistics_current; @@ -971,6 +977,7 @@ int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata, uint32_t srcnode); int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data); +bool ctdb_check_message_handler(struct ctdb_context *ctdb, uint64_t srvid); int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid); int ctdb_deregister_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data); diff --git a/lib/tevent/tevent.c b/lib/tevent/tevent.c index 5eec5cc..2d9e8d8 100644 --- a/lib/tevent/tevent.c +++ b/lib/tevent/tevent.c @@ -162,6 +162,7 @@ int tevent_common_context_destructor(struct tevent_context *ev) DLIST_REMOVE(ev->fd_events, fd); } + ev->last_zero_timer = NULL; for (te = ev->timer_events; te; te = tn) { tn = te->next; te->event_ctx = NULL; diff --git a/lib/tevent/tevent_internal.h b/lib/tevent/tevent_internal.h index ba03bc5..38e627a 100644 --- a/lib/tevent/tevent_internal.h +++ b/lib/tevent/tevent_internal.h @@ -228,6 +228,7 @@ struct tevent_context { /* list of timed events - used by common code */ struct tevent_timer *timer_events; + struct tevent_timer *last_zero_timer; /* list of immediate events - used by common code */ struct tevent_immediate *immediate_events; diff --git a/lib/tevent/tevent_timed.c b/lib/tevent/tevent_timed.c index 457ef1c..075d6dc 100644 --- a/lib/tevent/tevent_timed.c +++ b/lib/tevent/tevent_timed.c @@ -133,13 +133,18 @@ struct timeval tevent_timeval_current_ofs(uint32_t secs, uint32_t usecs) */ static int tevent_common_timed_destructor(struct tevent_timer *te) { + if (te->event_ctx == NULL) { + return 0; + } + tevent_debug(te->event_ctx, TEVENT_DEBUG_TRACE, "Destroying timer event %p \"%s\"\n", te, te->handler_name); - if (te->event_ctx) { - DLIST_REMOVE(te->event_ctx->timer_events, te); + if (te->event_ctx->last_zero_timer == te) { + te->event_ctx->last_zero_timer = DLIST_PREV(te); } + DLIST_REMOVE(te->event_ctx->timer_events, te); return 0; } @@ -160,7 +165,8 @@ struct tevent_timer *tevent_common_add_timer(struct tevent_context *ev, TALLOC_C const char *handler_name, const char *location) { - struct tevent_timer *te, *last_te, *cur_te; + struct tevent_timer *te; + struct tevent_timer *prev_te = NULL; te = talloc(mem_ctx?mem_ctx:ev, struct tevent_timer); if (te == NULL) return NULL; @@ -173,18 +179,53 @@ struct tevent_timer *tevent_common_add_timer(struct tevent_context *ev, TALLOC_C te->location = location; te->additional_data = NULL; + if (ev->timer_events == NULL) { + ev->last_zero_timer = NULL; + } + /* keep the list ordered */ - last_te = NULL; - for (cur_te = ev->timer_events; cur_te; cur_te = cur_te->next) { - /* if the new event comes before the current one break */ -- CTDB repository