The branch, master has been updated via 77d4e07 tdb: version 1.3.13 via 3607826 tdb: Improve debugging in _tdb_transaction_start via 1148e8f tdb: Improve debugging when the allrecord lock fails to upgrade via 19b193e tdb: runtime check for robust mutexes may hang in threaded programs from 5701880 notify: Fix ordering of events in notifyd
https://git.samba.org/?p=samba.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 77d4e07ef3a0b9d7c2b1c660c8ac770c07120173 Author: Stefan Metzmacher <me...@samba.org> Date: Tue Apr 11 17:27:33 2017 +0200 tdb: version 1.3.13 * documentation for the tdbbackup -n option * correctly upgrade F_RDLCK to F_WRLCK locks * tdbtool: Add "storehex" command * fix robust mutex detection in threaded applications (bug #12593) * improve debugging of transaction lock failures Signed-off-by: Stefan Metzmacher <me...@samba.org> Reviewed-by: Andreas Schneider <a...@samba.org> Autobuild-User(master): Stefan Metzmacher <me...@samba.org> Autobuild-Date(master): Thu Apr 27 18:50:10 CEST 2017 on sn-devel-144 commit 36078263344fbad348fe21da757976ecd6d55be6 Author: Andrew Bartlett <abart...@samba.org> Date: Fri Mar 31 17:35:06 2017 +1300 tdb: Improve debugging in _tdb_transaction_start Signed-off-by: Andrew Bartlett <abart...@samba.org> Reviewed-by: Stefan Metzmacher <me...@samba.org> Reviewed-by: Andreas Schneider <a...@samba.org> commit 1148e8f0408b62b0417daf8e2727cdaf7cffed09 Author: Andrew Bartlett <abart...@samba.org> Date: Thu Mar 30 19:11:06 2017 +1300 tdb: Improve debugging when the allrecord lock fails to upgrade Pair-Programmed-With: Stefan Metzmacher <me...@samba.org> Signed-off-by: Andrew Bartlett <abart...@samba.org> Signed-off-by: Stefan Metzmacher <me...@samba.org> Reviewed-by: Andreas Schneider <a...@samba.org> commit 19b193ebc974efdebdf347143938b5d053e67051 Author: Ralph Boehme <s...@samba.org> Date: Tue Mar 14 14:24:18 2017 +0100 tdb: runtime check for robust mutexes may hang in threaded programs The current runtime check for robust mutexes in tdb_runtime_check_for_robust_mutexes() is not thread-safe. When called in a multi-threaded program where any another thread doesn't have SIGCHLD blocked, we may end up hung in sigsuspend() waiting for a SIGCHLD of a child procecss and the signal was delivered to another thread. Revert to the previous behaviour of waiting for the child instead of waiting for the SIGCHLD signal. Ensure the pid we wait for is not reset to -1 in a toctou race with the signal handler. Check whether waitpid() returns ECHILD which can happen if the signal handler is run by more then one thread in parallel (yes, this can happen) or if tdb_robust_mutex_wait_for_child() and the signal handler are racing. Bug: https://bugzilla.samba.org/show_bug.cgi?id=12593 Pair-programmed-with: Stefan Metzmacher <me...@samba.org> Signed-off-by: Ralph Boehme <s...@samba.org> Signed-off-by: Stefan Metzmacher <me...@samba.org> Reviewed-by: Andreas Schneider <a...@samba.org> ----------------------------------------------------------------------- Summary of changes: lib/tdb/ABI/{tdb-1.3.11.sigs => tdb-1.3.13.sigs} | 0 lib/tdb/common/lock.c | 2 + lib/tdb/common/mutex.c | 116 ++++++++++++++--------- lib/tdb/common/transaction.c | 9 +- lib/tdb/wscript | 2 +- 5 files changed, 81 insertions(+), 48 deletions(-) copy lib/tdb/ABI/{tdb-1.3.11.sigs => tdb-1.3.13.sigs} (100%) Changeset truncated at 500 lines: diff --git a/lib/tdb/ABI/tdb-1.3.11.sigs b/lib/tdb/ABI/tdb-1.3.13.sigs similarity index 100% copy from lib/tdb/ABI/tdb-1.3.11.sigs copy to lib/tdb/ABI/tdb-1.3.13.sigs diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c index 4ad70cf..e330201 100644 --- a/lib/tdb/common/lock.c +++ b/lib/tdb/common/lock.c @@ -257,12 +257,14 @@ int tdb_allrecord_upgrade(struct tdb_context *tdb) TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_allrecord_upgrade failed: count %u too high\n", tdb->allrecord_lock.count)); + tdb->ecode = TDB_ERR_LOCK; return -1; } if (tdb->allrecord_lock.off != 1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_allrecord_upgrade failed: already upgraded?\n")); + tdb->ecode = TDB_ERR_LOCK; return -1; } diff --git a/lib/tdb/common/mutex.c b/lib/tdb/common/mutex.c index cac3916..8a122d5 100644 --- a/lib/tdb/common/mutex.c +++ b/lib/tdb/common/mutex.c @@ -752,12 +752,23 @@ static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int), static void tdb_robust_mutex_handler(int sig) { - if (tdb_robust_mutex_pid != -1) { + pid_t child_pid = tdb_robust_mutex_pid; + + if (child_pid != -1) { pid_t pid; - int status; - pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG); - if (pid == tdb_robust_mutex_pid) { + pid = waitpid(child_pid, NULL, WNOHANG); + if (pid == -1) { + switch (errno) { + case ECHILD: + tdb_robust_mutex_pid = -1; + return; + + default: + return; + } + } + if (pid == child_pid) { tdb_robust_mutex_pid = -1; return; } @@ -776,6 +787,44 @@ static void tdb_robust_mutex_handler(int sig) tdb_robust_mutext_old_handler(sig); } +static void tdb_robust_mutex_wait_for_child(pid_t *child_pid) +{ + int options = WNOHANG; + + if (*child_pid == -1) { + return; + } + + while (tdb_robust_mutex_pid > 0) { + pid_t pid; + + /* + * First we try with WNOHANG, as the process might not exist + * anymore. Once we've sent SIGKILL we block waiting for the + * exit. + */ + pid = waitpid(*child_pid, NULL, options); + if (pid == -1) { + if (errno == EINTR) { + continue; + } else if (errno == ECHILD) { + break; + } else { + abort(); + } + } + if (pid == *child_pid) { + break; + } + + kill(*child_pid, SIGKILL); + options = 0; + } + + tdb_robust_mutex_pid = -1; + *child_pid = -1; +} + _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) { void *ptr = NULL; @@ -788,9 +837,8 @@ _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) char c = 0; bool ok; static bool initialized; - sigset_t mask, old_mask, suspend_mask; + pid_t saved_child_pid = -1; bool cleanup_ma = false; - bool cleanup_sigmask = false; if (initialized) { return tdb_mutex_locking_cached; @@ -798,8 +846,6 @@ _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) initialized = true; - sigemptyset(&suspend_mask); - ok = tdb_mutex_locking_supported(); if (!ok) { return false; @@ -845,26 +891,13 @@ _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) } m = (pthread_mutex_t *)ptr; - /* - * Block SIGCHLD so we can atomically wait for it later with - * sigsuspend() - */ - sigemptyset(&mask); - sigaddset(&mask, SIGCHLD); - ret = pthread_sigmask(SIG_BLOCK, &mask, &old_mask); - if (ret != 0) { - goto cleanup; - } - cleanup_sigmask = true; - suspend_mask = old_mask; - sigdelset(&suspend_mask, SIGCHLD); - if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler, &tdb_robust_mutext_old_handler) == false) { goto cleanup; } tdb_robust_mutex_pid = fork(); + saved_child_pid = tdb_robust_mutex_pid; if (tdb_robust_mutex_pid == 0) { size_t nwritten; close(pipe_down[1]); @@ -914,14 +947,7 @@ _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) goto cleanup; } - while (tdb_robust_mutex_pid > 0) { - ret = sigsuspend(&suspend_mask); - if (ret != -1 || errno != EINTR) { - abort(); - } - } - tdb_robust_mutex_setup_sigchild(tdb_robust_mutext_old_handler, NULL); - tdb_robust_mutext_old_handler = SIG_ERR; + tdb_robust_mutex_wait_for_child(&saved_child_pid); ret = pthread_mutex_trylock(m); if (ret != EOWNERDEAD) { @@ -950,23 +976,21 @@ _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void) tdb_mutex_locking_cached = true; cleanup: - while (tdb_robust_mutex_pid > 0) { - kill(tdb_robust_mutex_pid, SIGKILL); - ret = sigsuspend(&suspend_mask); - if (ret != -1 || errno != EINTR) { - abort(); - } - } + /* + * Note that we don't reset the signal handler we just reset + * tdb_robust_mutex_pid to -1. This is ok as this code path is only + * called once per process. + * + * Leaving our signal handler avoids races with other threads potentialy + * setting up their SIGCHLD handlers. + * + * The worst thing that can happen is that the other newer signal + * handler will get the SIGCHLD signal for our child and/or reap the + * child with a wait() function. tdb_robust_mutex_wait_for_child() + * handles the case where waitpid returns ECHILD. + */ + tdb_robust_mutex_wait_for_child(&saved_child_pid); - if (tdb_robust_mutext_old_handler != SIG_ERR) { - tdb_robust_mutex_setup_sigchild(tdb_robust_mutext_old_handler, NULL); - } - if (cleanup_sigmask) { - ret = pthread_sigmask(SIG_SETMASK, &old_mask, NULL); - if (ret != 0) { - abort(); - } - } if (m != NULL) { pthread_mutex_destroy(m); } diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c index 0dd057b..f1050a2 100644 --- a/lib/tdb/common/transaction.c +++ b/lib/tdb/common/transaction.c @@ -476,6 +476,10 @@ static int _tdb_transaction_start(struct tdb_context *tdb, SAFE_FREE(tdb->transaction); if ((lockflags & TDB_LOCK_WAIT) == 0) { tdb->ecode = TDB_ERR_NOLOCK; + } else { + TDB_LOG((tdb, TDB_DEBUG_ERROR, + "tdb_transaction_start: " + "failed to get transaction lock\n")); } return -1; } @@ -982,7 +986,10 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb) /* upgrade the main transaction lock region to a write lock */ if (tdb_allrecord_upgrade(tdb) == -1) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to upgrade hash locks\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, + "tdb_transaction_prepare_commit: " + "failed to upgrade hash locks: %s\n", + tdb_errorstr(tdb))); _tdb_transaction_cancel(tdb); return -1; } diff --git a/lib/tdb/wscript b/lib/tdb/wscript index 693787c..987d78c 100644 --- a/lib/tdb/wscript +++ b/lib/tdb/wscript @@ -1,7 +1,7 @@ #!/usr/bin/env python APPNAME = 'tdb' -VERSION = '1.3.12' +VERSION = '1.3.13' blddir = 'bin' -- Samba Shared Repository