The branch, master has been updated via 83542d9... s3: Slightly increase parallelism in g_lock via be919d6... s3: Avoid starving locks when many processes die at the same time via 725b365... s3: Avoid a thundering herd in g_lock_unlock via 07978bd... s3: Optimize g_lock_lock for a heavily contended case via f3bdb16... s3: Fix handling of processes that died in g_lock from eda16f2... s4-kcc: remove a qsort() that snuck into the new topology code
http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 83542d973ca771353109c7da4b0391d6ba910f53 Author: Volker Lendecke <v...@samba.org> Date: Tue Feb 16 12:31:58 2010 +0100 s3: Slightly increase parallelism in g_lock There's no need to still hold the g_lock tdb-level lock while telling the waiters to retry commit be919d6faed198cdc29322a4d9491946c0b044b3 Author: Volker Lendecke <v...@samba.org> Date: Tue Feb 16 12:28:53 2010 +0100 s3: Avoid starving locks when many processes die at the same time In g_lock_unlock we have a little race between the process_exists and messaging_send call: We only send to 5 waiters now, they all might have died between us checking their existence and sending the message. This change makes g_lock_lock retry at least once every minute. commit 725b3654f831fbe0388cc09f46269903c9eef1d7 Author: Volker Lendecke <v...@samba.org> Date: Tue Feb 16 12:22:08 2010 +0100 s3: Avoid a thundering herd in g_lock_unlock Only notify the first 5 pending lock waiters. This avoids a thundering herd problem that is really nasty in a cluster. It also makes acquiring a lock a bit more FIFO, lock waiters are added to the end of the array. commit 07978bd175395e0dc770f68fff5b8bd8b0fdeb51 Author: Volker Lendecke <v...@samba.org> Date: Mon Feb 15 16:57:16 2010 +0100 s3: Optimize g_lock_lock for a heavily contended case Only check the existence of the lock owner in g_lock_parse, check the rest of the records only when we got the lock successfully. This reduces the load on process_exists which can involve a network roundtrip in the clustered case. commit f3bdb163f461175c50b4930fa3464beaee30f4a8 Author: Volker Lendecke <v...@samba.org> Date: Mon Feb 15 16:49:46 2010 +0100 s3: Fix handling of processes that died in g_lock g_lock_parse might have thrown away entries from the locks array because the processes were not around anymore. Don't store the orphaned entries. ----------------------------------------------------------------------- Summary of changes: source3/lib/g_lock.c | 82 +++++++++++++++++++++++++++++++++++++++++-------- 1 files changed, 68 insertions(+), 14 deletions(-) Changeset truncated at 500 lines: diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c index 26b079d..add670c 100644 --- a/source3/lib/g_lock.c +++ b/source3/lib/g_lock.c @@ -108,6 +108,34 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, (locks[i].lock_type & G_LOCK_PENDING) ? "(pending)" : "(owner)")); + if (((locks[i].lock_type & G_LOCK_PENDING) == 0) + && !process_exists(locks[i].pid)) { + + DEBUGADD(10, ("lock owner %s died -- discarding\n", + procid_str(talloc_tos(), + &locks[i].pid))); + + if (i < (num_locks-1)) { + locks[i] = locks[num_locks-1]; + } + num_locks -= 1; + } + } + + *plocks = locks; + *pnum_locks = num_locks; + return true; +} + +static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks) +{ + int i, num_locks; + + num_locks = *pnum_locks; + + DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks)); + + for (i=0; i<num_locks; i++) { if (process_exists(locks[i].pid)) { continue; } @@ -119,19 +147,18 @@ static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, } num_locks -= 1; } - - *plocks = locks; *pnum_locks = num_locks; - return true; + return; } static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx, struct g_lock_rec *locks, - int num_locks, + int *pnum_locks, const struct server_id pid, enum g_lock_type lock_type) { struct g_lock_rec *result; + int num_locks = *pnum_locks; result = talloc_realloc(mem_ctx, locks, struct g_lock_rec, num_locks+1); @@ -141,6 +168,7 @@ static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx, result[num_locks].pid = pid; result[num_locks].lock_type = lock_type; + *pnum_locks += 1; return result; } @@ -221,7 +249,7 @@ again: if (our_index == -1) { /* First round, add ourself */ - locks = g_lock_addrec(talloc_tos(), locks, num_locks, + locks = g_lock_addrec(talloc_tos(), locks, &num_locks, self, lock_type); if (locks == NULL) { DEBUG(10, ("g_lock_addrec failed\n")); @@ -237,7 +265,14 @@ again: locks[our_index].lock_type = lock_type; } - data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks)); + if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) { + /* + * Walk through the list of locks, search for dead entries + */ + g_lock_cleanup(&num_locks, locks); + } + + data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks)); store_status = rec->store(rec, data, 0); if (!NT_STATUS_IS_OK(store_status)) { DEBUG(1, ("rec->store failed: %s\n", @@ -263,7 +298,6 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, NTSTATUS status; bool retry = false; struct timeval timeout_end; - struct timeval timeout_remaining; struct timeval time_now; DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type, @@ -304,6 +338,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, fd_set *r_fds = NULL; int max_fd = 0; int ret; + struct timeval select_timeout; status = g_lock_trylock(ctx, name, lock_type); if (NT_STATUS_IS_OK(status)) { @@ -360,12 +395,10 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, } #endif - time_now = timeval_current(); - timeout_remaining = timeval_until(&time_now, &timeout_end); + select_timeout = timeval_set(60, 0); ret = sys_select(max_fd + 1, r_fds, NULL, NULL, - &timeout_remaining); - + &select_timeout); if (ret == -1) { if (errno != EINTR) { DEBUG(1, ("error calling select: %s\n", @@ -386,7 +419,7 @@ NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, break; } else { DEBUG(10, ("select returned 0 but timeout not " - "not expired: strange - retrying\n")); + "not expired, retrying\n")); } } else if (ret != 1) { DEBUG(1, ("invalid return code of select: %d\n", ret)); @@ -494,14 +527,26 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, goto done; } + TALLOC_FREE(rec); + if ((lock_type & G_LOCK_PENDING) == 0) { + int num_wakeups = 0; + /* - * We've been the lock holder. Tell all others to retry. + * We've been the lock holder. Others to retry. Don't + * tell all others to avoid a thundering herd. In case + * this leads to a complete stall because we miss some + * processes, the loop in g_lock_lock tries at least + * once a minute. */ + for (i=0; i<num_locks; i++) { if ((locks[i].lock_type & G_LOCK_PENDING) == 0) { continue; } + if (!process_exists(locks[i].pid)) { + continue; + } /* * Ping all waiters to retry @@ -514,13 +559,22 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, procid_str(talloc_tos(), &locks[i].pid), nt_errstr(status))); + } else { + num_wakeups += 1; + } + if (num_wakeups > 5) { + break; } } } done: + /* + * For the error path, TALLOC_FREE(rec) as well. In the good + * path we have already freed it. + */ + TALLOC_FREE(rec); TALLOC_FREE(locks); - TALLOC_FREE(rec); return status; } -- Samba Shared Repository