This is an automated email from the ASF dual-hosted git repository.
wwbmmm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brpc.git
The following commit(s) were added to refs/heads/master by this push:
new f4e00acc Support detection of mutex deadlock caused by double lock
(#2765)
f4e00acc is described below
commit f4e00acc30b8824ba7d43c215a50c0f54d682ea0
Author: Bright Chen <[email protected]>
AuthorDate: Sun Oct 13 20:37:53 2024 +0800
Support detection of mutex deadlock caused by double lock (#2765)
* Support detection of bthread mutex and FastMutex deadlock caused by
double lock
* Support detection of pthread mutex deadlock caused by double lock
---
.github/workflows/ci-linux.yml | 12 +-
BUILD.bazel | 3 +
CMakeLists.txt | 8 +-
bazel/config/BUILD.bazel | 6 +
config_brpc.sh | 6 +-
src/bthread/mutex.cpp | 280 ++++++++++++++++++++++++++++++++++++----
src/bthread/mutex.h | 9 +-
src/bthread/types.h | 15 ++-
test/bthread_mutex_unittest.cpp | 5 +
9 files changed, 308 insertions(+), 36 deletions(-)
diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 99c0bd1b..14961ebb 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -61,7 +61,7 @@ jobs:
- uses: ./.github/actions/install-all-dependences
- uses: ./.github/actions/init-make-config
with:
- options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma
--with-debug-bthread-sche-safety
+ options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma
--with-debug-bthread-sche-safety --with-debug-lock
- name: compile
run: |
make -j ${{env.proc_num}}
@@ -76,7 +76,7 @@ jobs:
export CC=gcc && export CXX=g++
mkdir build
cd build
- cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON ..
+ cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON ..
- name: compile
run: |
cd build
@@ -86,7 +86,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- - run: bazel test --verbose_failures --define with_mesalink=false --define
with_glog=true --define with_thrift=true --define
with_debug_bthread_sche_safety=true -- //... -//example/...
+ - run: bazel test --verbose_failures --define with_mesalink=false --define
with_glog=true --define with_thrift=true --define
with_debug_bthread_sche_safety=true --define with_debug_lock=true -- //...
-//example/...
clang-compile-with-make:
runs-on: ubuntu-20.04
@@ -135,7 +135,7 @@ jobs:
- uses: ./.github/actions/install-all-dependences
- uses: ./.github/actions/init-make-config
with:
- options: --cc=clang --cxx=clang++ --with-thrift --with-glog
--with-rdma --with-debug-bthread-sche-safety
+ options: --cc=clang --cxx=clang++ --with-thrift --with-glog
--with-rdma --with-debug-bthread-sche-safety --with-debug-lock
- name: compile
run: |
make -j ${{env.proc_num}}
@@ -150,7 +150,7 @@ jobs:
export CC=clang && export CXX=clang++
mkdir build
cd build
- cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON ..
+ cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON ..
- name: compile
run: |
cd build
@@ -160,7 +160,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- - run: bazel build --verbose_failures --action_env=CC=clang-12 --define
with_mesalink=false --define with_glog=true --define with_thrift=true --define
with_debug_bthread_sche_safety=true -- //... -//example/...
+ - run: bazel build --verbose_failures --action_env=CC=clang-12 --define
with_mesalink=false --define with_glog=true --define with_thrift=true --define
with_debug_bthread_sche_safety=true --define with_debug_lock=true -- //...
-//example/...
clang-unittest:
runs-on: ubuntu-20.04
diff --git a/BUILD.bazel b/BUILD.bazel
index 19d04a93..0e2e66ed 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -48,6 +48,9 @@ COPTS = [
}) + select({
"//bazel/config:brpc_with_debug_bthread_sche_safety":
["-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=1"],
"//conditions:default": ["-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=0"],
+}) + select({
+ "//bazel/config:brpc_with_debug_lock": ["-DBRPC_DEBUG_LOCK=1"],
+ "//conditions:default": ["-DBRPC_DEBUG_LOCK=0"],
})
LINKOPTS = [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8fa7716..71c611e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,7 @@ option(WITH_THRIFT "With thrift framed protocol supported"
OFF)
option(WITH_SNAPPY "With snappy" OFF)
option(WITH_RDMA "With RDMA" OFF)
option(WITH_DEBUG_BTHREAD_SCHE_SAFETY "With debugging bthread sche safety" OFF)
+option(WITH_DEBUG_LOCK "With debugging lock" OFF)
option(BUILD_UNIT_TESTS "Whether to build unit tests" OFF)
option(BUILD_FUZZ_TESTS "Whether to build fuzz tests" OFF)
option(BUILD_BRPC_TOOLS "Whether to build brpc tools" ON)
@@ -67,6 +68,11 @@ if(WITH_DEBUG_SYMBOLS)
set(DEBUG_SYMBOL "-g")
endif()
+set(WITH_DEBUG_LOCK_VAL "0")
+if(WITH_DEBUG_LOCK)
+ set(WITH_DEBUG_LOCK_VAL "1")
+endif()
+
if(WITH_THRIFT)
set(THRIFT_CPP_FLAG "-DENABLE_THRIFT_FRAMED_PROTOCOL")
find_library(THRIFT_LIB NAMES thrift)
@@ -123,7 +129,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations
-Wno-inconsistent-missing-override")
endif()
-set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME}
-DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL}
-DGFLAGS_NS=${GFLAGS_NS}
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=${WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL}")
+set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME}
-DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL}
-DGFLAGS_NS=${GFLAGS_NS}
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=${WITH_DEBUG_BTHREAD_SCHE_SAFETY_VAL}
-DBRPC_DEBUG_LOCK=${WITH_DEBUG_LOCK_VAL}")
if(WITH_MESALINK)
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DUSE_MESALINK")
endif()
diff --git a/bazel/config/BUILD.bazel b/bazel/config/BUILD.bazel
index d7c6d533..10965747 100644
--- a/bazel/config/BUILD.bazel
+++ b/bazel/config/BUILD.bazel
@@ -114,4 +114,10 @@ config_setting(
name = "brpc_with_debug_bthread_sche_safety",
define_values = {"with_debug_bthread_sche_safety": "true"},
visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "brpc_with_debug_lock",
+ define_values = {"with_debug_lock": "true"},
+ visibility = ["//visibility:public"],
)
\ No newline at end of file
diff --git a/config_brpc.sh b/config_brpc.sh
index 9b6b188c..fa607292 100755
--- a/config_brpc.sh
+++ b/config_brpc.sh
@@ -38,13 +38,14 @@ else
LDD=ldd
fi
-TEMP=`getopt -o v: --long
headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-debug-bthread-sche-safety,nodebugsymbols
-n 'config_brpc' -- "$@"`
+TEMP=`getopt -o v: --long
headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols
-n 'config_brpc' -- "$@"`
WITH_GLOG=0
WITH_THRIFT=0
WITH_RDMA=0
WITH_MESALINK=0
BRPC_DEBUG_BTHREAD_SCHE_SAFETY=0
DEBUGSYMBOLS=-g
+BRPC_DEBUG_LOCK=0
if [ $? != 0 ] ; then >&2 $ECHO "Terminating..."; exit 1 ; fi
@@ -69,6 +70,7 @@ while true; do
--with-rdma) WITH_RDMA=1; shift 1 ;;
--with-mesalink) WITH_MESALINK=1; shift 1 ;;
--with-debug-bthread-sche-safety ) BRPC_DEBUG_BTHREAD_SCHE_SAFETY=1;
shift 1 ;;
+ --with-debug-lock ) BRPC_DEBUG_LOCK=1; shift 1 ;;
--nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;;
-- ) shift; break ;;
* ) break ;;
@@ -409,7 +411,7 @@ append_to_output "STATIC_LINKINGS=$STATIC_LINKINGS"
append_to_output "DYNAMIC_LINKINGS=$DYNAMIC_LINKINGS"
# CPP means C PreProcessing, not C PlusPlus
-CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY"
+CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY
-DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK"
# Avoid over-optimizations of TLS variables by GCC>=4.8
# See: https://github.com/apache/brpc/issues/1693
diff --git a/src/bthread/mutex.cpp b/src/bthread/mutex.cpp
index 30872561..fa5f55b5 100644
--- a/src/bthread/mutex.cpp
+++ b/src/bthread/mutex.cpp
@@ -278,7 +278,7 @@ static pthread_mutex_t g_cp_mutex =
PTHREAD_MUTEX_INITIALIZER;
// The map storing information for profiling pthread_mutex. Different from
// bthread_mutex, we can't save stuff into pthread_mutex, we neither can
// save the info in TLS reliably, since a mutex can be unlocked in a different
-// thread from the one locked (although rare)
+// thread from the one locked (although rare, undefined behavior)
// This map must be very fast, since it's accessed inside the lock.
// Layout of the map:
// * Align each entry by cacheline so that different threads do not collide.
@@ -383,10 +383,15 @@ void
make_contention_site_invalid(bthread_contention_site_t* cs) {
// First call to sys_pthread_mutex_lock sets sys_pthread_mutex_lock to the
// real function so that next calls go to the real function directly. This
// technique avoids calling pthread_once each time.
+typedef int (*MutexInitOp)(pthread_mutex_t*, const pthread_mutexattr_t*);
typedef int (*MutexOp)(pthread_mutex_t*);
+int first_sys_pthread_mutex_init(pthread_mutex_t* mutex, const
pthread_mutexattr_t* mutexattr);
+int first_sys_pthread_mutex_destroy(pthread_mutex_t* mutex);
int first_sys_pthread_mutex_lock(pthread_mutex_t* mutex);
int first_sys_pthread_mutex_trylock(pthread_mutex_t* mutex);
int first_sys_pthread_mutex_unlock(pthread_mutex_t* mutex);
+static MutexInitOp sys_pthread_mutex_init = first_sys_pthread_mutex_init;
+static MutexOp sys_pthread_mutex_destroy = first_sys_pthread_mutex_destroy;
static MutexOp sys_pthread_mutex_lock = first_sys_pthread_mutex_lock;
static MutexOp sys_pthread_mutex_trylock = first_sys_pthread_mutex_trylock;
static MutexOp sys_pthread_mutex_unlock = first_sys_pthread_mutex_unlock;
@@ -438,6 +443,10 @@ static void init_sys_mutex_lock() {
// TODO: may need dlvsym when GLIBC has multiple versions of a same symbol.
//
http://blog.fesnel.com/blog/2009/08/25/preloading-with-multiple-symbol-versions
if (_dl_sym) {
+ sys_pthread_mutex_init = (MutexInitOp)_dl_sym(
+ RTLD_NEXT, "pthread_mutex_init", (void*)init_sys_mutex_lock);
+ sys_pthread_mutex_destroy = (MutexOp)_dl_sym(
+ RTLD_NEXT, "pthread_mutex_destroy", (void*)init_sys_mutex_lock);
sys_pthread_mutex_lock = (MutexOp)_dl_sym(
RTLD_NEXT, "pthread_mutex_lock", (void*)init_sys_mutex_lock);
sys_pthread_mutex_unlock = (MutexOp)_dl_sym(
@@ -450,6 +459,8 @@ static void init_sys_mutex_lock() {
#endif // HAS_PTHREAD_MUTEX_TIMEDLOCK
} else {
// _dl_sym may be undefined reference in some system, fallback to dlsym
+ sys_pthread_mutex_init = (MutexInitOp)dlsym(RTLD_NEXT,
"pthread_mutex_init");
+ sys_pthread_mutex_destroy = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_destroy");
sys_pthread_mutex_lock = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_lock");
sys_pthread_mutex_unlock = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_unlock");
sys_pthread_mutex_trylock = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_trylock");
@@ -459,6 +470,8 @@ static void init_sys_mutex_lock() {
}
#elif defined(OS_MACOSX)
// TODO: look workaround for dlsym on mac
+ sys_pthread_mutex_init = (MutexInitOp)dlsym(RTLD_NEXT,
"pthread_mutex_init");
+ sys_pthread_mutex_destroy = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_destroy");
sys_pthread_mutex_lock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_lock");
sys_pthread_mutex_trylock = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_trylock");
sys_pthread_mutex_unlock = (MutexOp)dlsym(RTLD_NEXT,
"pthread_mutex_unlock");
@@ -468,6 +481,16 @@ static void init_sys_mutex_lock() {
// Make sure pthread functions are ready before main().
const int ALLOW_UNUSED dummy = pthread_once(&init_sys_mutex_lock_once,
init_sys_mutex_lock);
+int first_sys_pthread_mutex_init(pthread_mutex_t* mutex, const
pthread_mutexattr_t* mutexattr) {
+ pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
+ return sys_pthread_mutex_init(mutex, mutexattr);
+}
+
+int first_sys_pthread_mutex_destroy(pthread_mutex_t* mutex) {
+ pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
+ return sys_pthread_mutex_destroy(mutex);
+}
+
int first_sys_pthread_mutex_lock(pthread_mutex_t* mutex) {
pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
return sys_pthread_mutex_lock(mutex);
@@ -640,14 +663,158 @@ void submit_contention(const bthread_contention_site_t&
csite, int64_t now_ns) {
tls_warn_up = true;
}
+#if BRPC_DEBUG_LOCK
+#define MUTEX_RESET_OWNER_COMMON(owner)
\
+ ((butil::atomic<bool>*)&(owner).hold)
\
+ ->store(false, butil::memory_order_relaxed)
+
+#define PTHREAD_MUTEX_SET_OWNER(owner)
\
+ owner.id = pthread_numeric_id();
\
+ ((butil::atomic<bool>*)&(owner).hold)
\
+ ->store(true, butil::memory_order_release)
+
+// Check if the mutex has been locked by the current thread.
+// Double lock on the same thread will cause deadlock.
+#define PTHREAD_MUTEX_CHECK_OWNER(owner)
\
+ bool hold = ((butil::atomic<bool>*)&(owner).hold)
\
+ ->load(butil::memory_order_acquire);
\
+ if (hold && (owner).id == pthread_numeric_id()) {
\
+ butil::debug::StackTrace trace(true);
\
+ LOG(ERROR) << "Detected deadlock caused by double lock of
FastPthreadMutex:" \
+ << std::endl << trace.ToString();
\
+ }
+#else
+#define MUTEX_RESET_OWNER_COMMON(owner) ((void)0)
+#define PTHREAD_MUTEX_SET_OWNER(owner) ((void)0)
+#define PTHREAD_MUTEX_CHECK_OWNER(owner) ((void)0)
+#endif // BRPC_DEBUG_LOCK
+
namespace internal {
+
#ifndef NO_PTHREAD_MUTEX_HOOK
+
+#if BRPC_DEBUG_LOCK
+struct BAIDU_CACHELINE_ALIGNMENT MutexOwnerMapEntry {
+ butil::static_atomic<bool> valid;
+ pthread_mutex_t* mutex;
+ mutex_owner_t owner;
+};
+
+// The map storing owner information for pthread_mutex. Different from
+// bthread_mutex, we can't save stuff into pthread_mutex, we neither can
+// save the info in TLS reliably, since a mutex can be unlocked in a different
+// thread from the one locked (although rare).
+static MutexOwnerMapEntry g_mutex_owner_map[MUTEX_MAP_SIZE] = {}; //
zero-initialize
+
+static void InitMutexOwnerMapEntry(pthread_mutex_t* mutex,
+ const pthread_mutexattr_t* mutexattr) {
+ int type = PTHREAD_MUTEX_DEFAULT;
+ if (NULL != mutexattr) {
+ pthread_mutexattr_gettype(mutexattr, &type);
+ }
+ // Only normal mutexes are tracked.
+ if (type != PTHREAD_MUTEX_NORMAL) {
+ return;
+ }
+
+ // Fast path: If the hash entry is not used, use it.
+ MutexOwnerMapEntry& hash_entry =
+ g_mutex_owner_map[hash_mutex_ptr(mutex) & (MUTEX_MAP_SIZE - 1)];
+ if (!hash_entry.valid.exchange(true, butil::memory_order_relaxed)) {
+ MUTEX_RESET_OWNER_COMMON(hash_entry.owner);
+ return;
+ }
+
+ // Slow path: Find an unused entry.
+ for (auto& entry : g_mutex_owner_map) {
+ if (!entry.valid.exchange(true, butil::memory_order_relaxed)) {
+ MUTEX_RESET_OWNER_COMMON(entry.owner);
+ return;
+ }
+ }
+}
+
+static BUTIL_FORCE_INLINE
+MutexOwnerMapEntry* FindMutexOwnerMapEntry(pthread_mutex_t* mutex) {
+ if (NULL == mutex) {
+ return NULL;
+ }
+
+ // Fast path.
+ MutexOwnerMapEntry* hash_entry =
+ &g_mutex_owner_map[hash_mutex_ptr(mutex) & (MUTEX_MAP_SIZE - 1)];
+ if (hash_entry->valid.load(butil::memory_order_relaxed) &&
hash_entry->mutex == mutex) {
+ return hash_entry;
+ }
+ // Slow path.
+ for (auto& entry : g_mutex_owner_map) {
+ if (entry.valid.load(butil::memory_order_relaxed) && entry.mutex ==
mutex) {
+ return &entry;
+ }
+ }
+ return NULL;
+}
+
+static void DestroyMutexOwnerMapEntry(pthread_mutex_t* mutex) {
+ MutexOwnerMapEntry* entry = FindMutexOwnerMapEntry(mutex);
+ if (NULL != entry) {
+ entry->valid.store(false, butil::memory_order_relaxed);
+ }
+}
+
+#define INIT_MUTEX_OWNER_MAP_ENTRY(mutex, mutexattr) \
+ ::bthread::internal::InitMutexOwnerMapEntry(mutex, mutexattr)
+
+#define DESTROY_MUTEX_OWNER_MAP_ENTRY(mutex) \
+ ::bthread::internal::DestroyMutexOwnerMapEntry(mutex)
+
+#define FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex) \
+ MutexOwnerMapEntry* entry =
::bthread::internal::FindMutexOwnerMapEntry(mutex)
+
+#define SYS_PTHREAD_MUTEX_CHECK_OWNER \
+ if (NULL != entry) { \
+ PTHREAD_MUTEX_CHECK_OWNER(entry->owner); \
+ }
+
+#define SYS_PTHREAD_MUTEX_SET_OWNER \
+ if (NULL != entry) { \
+ PTHREAD_MUTEX_SET_OWNER(entry->owner); \
+ }
+
+#define SYS_PTHREAD_MUTEX_RESET_OWNER(mutex) \
+ FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex); \
+ if (NULL != entry) { \
+ MUTEX_RESET_OWNER_COMMON(entry->owner); \
+ }
+
+#else
+#define INIT_MUTEX_OWNER_MAP_ENTRY(mutex, mutexattr) ((void)0)
+#define DESTROY_MUTEX_OWNER_MAP_ENTRY(mutex) ((void)0)
+#define FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex) ((void)0)
+#define SYS_PTHREAD_MUTEX_CHECK_OWNER ((void)0)
+#define SYS_PTHREAD_MUTEX_SET_OWNER ((void)0)
+#define SYS_PTHREAD_MUTEX_RESET_OWNER(mutex) ((void)0)
+#endif // BRPC_DEBUG_LOCK
+
+
#if HAS_PTHREAD_MUTEX_TIMEDLOCK
BUTIL_FORCE_INLINE int pthread_mutex_lock_internal(pthread_mutex_t* mutex,
const struct timespec*
abstime) {
- int rc = NULL == abstime ?
- sys_pthread_mutex_lock(mutex) :
- sys_pthread_mutex_timedlock(mutex, abstime);
+ int rc = 0;
+ if (NULL == abstime) {
+ FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex);
+ SYS_PTHREAD_MUTEX_CHECK_OWNER;
+ rc = sys_pthread_mutex_lock(mutex);
+ if (0 == rc) {
+ SYS_PTHREAD_MUTEX_SET_OWNER;
+ }
+ } else {
+ rc = sys_pthread_mutex_timedlock(mutex, abstime);
+ if (0 == rc) {
+ FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex);
+ SYS_PTHREAD_MUTEX_SET_OWNER;
+ }
+ }
if (0 == rc) {
ADD_TLS_PTHREAD_LOCK_COUNT;
}
@@ -656,8 +823,11 @@ BUTIL_FORCE_INLINE int
pthread_mutex_lock_internal(pthread_mutex_t* mutex,
#else
BUTIL_FORCE_INLINE int pthread_mutex_lock_internal(pthread_mutex_t* mutex,
const struct timespec*/*
Not supported */) {
+ FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex);
+ SYS_PTHREAD_MUTEX_CHECK_OWNER;
int rc = sys_pthread_mutex_lock(mutex);
if (0 == rc) {
+ SYS_PTHREAD_MUTEX_SET_OWNER;
ADD_TLS_PTHREAD_LOCK_COUNT;
}
return rc;
@@ -667,12 +837,15 @@ BUTIL_FORCE_INLINE int
pthread_mutex_lock_internal(pthread_mutex_t* mutex,
BUTIL_FORCE_INLINE int pthread_mutex_trylock_internal(pthread_mutex_t* mutex) {
int rc = sys_pthread_mutex_trylock(mutex);
if (0 == rc) {
+ FIND_SYS_PTHREAD_MUTEX_OWNER_MAP_ENTRY(mutex);
+ SYS_PTHREAD_MUTEX_SET_OWNER;
ADD_TLS_PTHREAD_LOCK_COUNT;
}
return rc;
}
BUTIL_FORCE_INLINE int pthread_mutex_unlock_internal(pthread_mutex_t* mutex) {
+ SYS_PTHREAD_MUTEX_RESET_OWNER(mutex);
SUB_TLS_PTHREAD_LOCK_COUNT;
return sys_pthread_mutex_unlock(mutex);
}
@@ -838,9 +1011,51 @@ const MutexInternal MUTEX_LOCKED_RAW = {{1},{0},0};
BAIDU_CASSERT(sizeof(unsigned) == sizeof(MutexInternal),
sizeof_mutex_internal_must_equal_unsigned);
+#if BRPC_DEBUG_LOCK
+
+#define BTHREAD_MUTEX_SET_OWNER
\
+ do {
\
+ TaskGroup* task_group =
BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group); \
+ if (NULL != task_group && !task_group->is_current_main_task()) {
\
+ m->owner.id = bthread_self();
\
+ } else {
\
+ m->owner.id = pthread_numeric_id();
\
+ }
\
+ ((butil::atomic<bool>*)&m->owner.hold)
\
+ ->store(true, butil::memory_order_release);
\
+ } while(false)
+
+// Check if the mutex has been locked by the current thread.
+// Double lock on the same thread will cause deadlock.
+#define BTHREAD_MUTEX_CHECK_OWNER
\
+ bool hold = ((butil::atomic<bool>*)&m->owner.hold)
\
+ ->load(butil::memory_order_acquire);
\
+ bool double_lock =
\
+ hold && (m->owner.id == bthread_self() || m->owner.id ==
pthread_numeric_id()); \
+ if (double_lock) {
\
+ butil::debug::StackTrace trace(true);
\
+ LOG(ERROR) << "Detected deadlock caused by double lock of
bthread_mutex_t:" \
+ << std::endl << trace.ToString();
\
+ }
+#else
+#define BTHREAD_MUTEX_SET_OWNER ((void)0)
+#define BTHREAD_MUTEX_CHECK_OWNER ((void)0)
+#endif // BRPC_DEBUG_LOCK
+
+inline int mutex_trylock_impl(bthread_mutex_t* m) {
+ MutexInternal* split = (MutexInternal*)m->butex;
+ if (!split->locked.exchange(1, butil::memory_order_acquire)) {
+ BTHREAD_MUTEX_SET_OWNER;
+ return 0;
+ }
+ return EBUSY;
+}
+
const int MAX_SPIN_ITER = 4;
-inline int mutex_lock_contended_impl(bthread_mutex_t* m, const struct
timespec* abstime) {
+inline int mutex_lock_contended_impl(bthread_mutex_t* __restrict m,
+ const struct timespec* __restrict
abstime) {
+ BTHREAD_MUTEX_CHECK_OWNER;
// When a bthread first contends for a lock, active spinning makes sense.
// Spin only few times and only if local `rq' is empty.
TaskGroup* g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
@@ -873,12 +1088,17 @@ inline int mutex_lock_contended_impl(bthread_mutex_t* m,
const struct timespec*
queue_lifo = true;
}
}
+ BTHREAD_MUTEX_SET_OWNER;
return 0;
}
#ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX
namespace internal {
+FastPthreadMutex::FastPthreadMutex() : _futex(0) {
+ MUTEX_RESET_OWNER_COMMON(_owner);
+}
+
int FastPthreadMutex::lock_contended(const struct timespec* abstime) {
int64_t abstime_us = 0;
if (NULL != abstime) {
@@ -905,6 +1125,8 @@ int FastPthreadMutex::lock_contended(const struct
timespec* abstime) {
return errno;
}
}
+ PTHREAD_MUTEX_SET_OWNER(_owner);
+ ADD_TLS_PTHREAD_LOCK_COUNT;
return 0;
}
@@ -913,14 +1135,15 @@ void FastPthreadMutex::lock() {
return;
}
+ PTHREAD_MUTEX_CHECK_OWNER(_owner);
(void)lock_contended(NULL);
- ADD_TLS_PTHREAD_LOCK_COUNT;
}
bool FastPthreadMutex::try_lock() {
auto split = (bthread::MutexInternal*)&_futex;
bool lock = !split->locked.exchange(1, butil::memory_order_acquire);
if (lock) {
+ PTHREAD_MUTEX_SET_OWNER(_owner);
ADD_TLS_PTHREAD_LOCK_COUNT;
}
return lock;
@@ -930,15 +1153,12 @@ bool FastPthreadMutex::timed_lock(const struct timespec*
abstime) {
if (try_lock()) {
return true;
}
- int rc = lock_contended(abstime);
- if (rc == 0) {
- ADD_TLS_PTHREAD_LOCK_COUNT;
- }
- return rc == 0;
+ return 0 == lock_contended(abstime);
}
void FastPthreadMutex::unlock() {
SUB_TLS_PTHREAD_LOCK_COUNT;
+ MUTEX_RESET_OWNER_COMMON(_owner);
auto whole = (butil::atomic<unsigned>*)&_futex;
const unsigned prev = whole->exchange(0, butil::memory_order_release);
// CAUTION: the mutex may be destroyed, check comments before butex_create
@@ -971,6 +1191,7 @@ __BEGIN_DECLS
int bthread_mutex_init(bthread_mutex_t* __restrict m,
const bthread_mutexattr_t* __restrict attr) {
bthread::make_contention_site_invalid(&m->csite);
+ MUTEX_RESET_OWNER_COMMON(m->owner);
m->butex = bthread::butex_create_checked<unsigned>();
if (!m->butex) {
return ENOMEM;
@@ -986,11 +1207,7 @@ int bthread_mutex_destroy(bthread_mutex_t* m) {
}
int bthread_mutex_trylock(bthread_mutex_t* m) {
- bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex;
- if (!split->locked.exchange(1, butil::memory_order_acquire)) {
- return 0;
- }
- return EBUSY;
+ return bthread::mutex_trylock_impl(m);
}
int bthread_mutex_lock_contended(bthread_mutex_t* m) {
@@ -999,8 +1216,7 @@ int bthread_mutex_lock_contended(bthread_mutex_t* m) {
static int bthread_mutex_lock_impl(bthread_mutex_t* __restrict m,
const struct timespec* __restrict abstime) {
- auto split = (bthread::MutexInternal*)m->butex;
- if (!split->locked.exchange(1, butil::memory_order_acquire)) {
+ if (0 == bthread::mutex_trylock_impl(m)) {
return 0;
}
// Don't sample when contention profiler is off.
@@ -1047,6 +1263,7 @@ int bthread_mutex_unlock(bthread_mutex_t* m) {
saved_csite = m->csite;
bthread::make_contention_site_invalid(&m->csite);
}
+ MUTEX_RESET_OWNER_COMMON(m->owner);
const unsigned prev = whole->exchange(0, butil::memory_order_release);
// CAUTION: the mutex may be destroyed, check comments before butex_create
if (prev == BTHREAD_MUTEX_LOCKED) {
@@ -1081,20 +1298,35 @@ int bthread_mutexattr_destroy(bthread_mutexattr_t*
attr) {
}
#ifndef NO_PTHREAD_MUTEX_HOOK
-int pthread_mutex_lock(pthread_mutex_t* __mutex) {
- return bthread::pthread_mutex_lock_impl(__mutex);
+
+int pthread_mutex_init(pthread_mutex_t * __restrict mutex,
+ const pthread_mutexattr_t* __restrict mutexattr) {
+ INIT_MUTEX_OWNER_MAP_ENTRY(mutex, mutexattr);
+ return bthread::sys_pthread_mutex_init(mutex, mutexattr);
+}
+
+int pthread_mutex_destroy(pthread_mutex_t* mutex) {
+ DESTROY_MUTEX_OWNER_MAP_ENTRY(mutex);
+ return bthread::sys_pthread_mutex_destroy(mutex);
}
-int pthread_mutex_trylock(pthread_mutex_t* __mutex) {
- return bthread::pthread_mutex_trylock_impl(__mutex);
+
+int pthread_mutex_lock(pthread_mutex_t* mutex) {
+ return bthread::pthread_mutex_lock_impl(mutex);
}
+
#if defined(OS_LINUX) && defined(OS_POSIX) && defined(__USE_XOPEN2K)
int pthread_mutex_timedlock(pthread_mutex_t *__restrict __mutex,
const struct timespec *__restrict
__abstime) {
return bthread::pthread_mutex_timedlock_impl(__mutex, __abstime);
}
#endif // OS_POSIX __USE_XOPEN2K
-int pthread_mutex_unlock(pthread_mutex_t* __mutex) {
- return bthread::pthread_mutex_unlock_impl(__mutex);
+
+int pthread_mutex_trylock(pthread_mutex_t* mutex) {
+ return bthread::pthread_mutex_trylock_impl(mutex);
+}
+
+int pthread_mutex_unlock(pthread_mutex_t* mutex) {
+ return bthread::pthread_mutex_unlock_impl(mutex);
}
#endif // NO_PTHREAD_MUTEX_HOOK
diff --git a/src/bthread/mutex.h b/src/bthread/mutex.h
index d05d753c..11c7eae8 100644
--- a/src/bthread/mutex.h
+++ b/src/bthread/mutex.h
@@ -35,6 +35,7 @@ extern int bthread_mutex_lock(bthread_mutex_t* mutex);
extern int bthread_mutex_timedlock(bthread_mutex_t* __restrict mutex,
const struct timespec* __restrict abstime);
extern int bthread_mutex_unlock(bthread_mutex_t* mutex);
+extern bthread_t bthread_self(void);
__END_DECLS
namespace bthread {
@@ -76,8 +77,7 @@ namespace internal {
#ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX
class FastPthreadMutex {
public:
- FastPthreadMutex() : _futex(0) {}
- ~FastPthreadMutex() = default;
+ FastPthreadMutex();
void lock();
void unlock();
bool try_lock();
@@ -85,7 +85,12 @@ public:
private:
DISALLOW_COPY_AND_ASSIGN(FastPthreadMutex);
int lock_contended(const struct timespec* abstime);
+
unsigned _futex;
+ // Note: Owner detection of the mutex comes with average execution
+ // slowdown of about 50%., so it is only used for debugging and is
+ // only available when the macro `BRPC_DEBUG_LOCK' = 1.
+ mutex_owner_t _owner;
};
#else
typedef butil::Mutex FastPthreadMutex;
diff --git a/src/bthread/types.h b/src/bthread/types.h
index 0124fcbd..60f5c7fe 100644
--- a/src/bthread/types.h
+++ b/src/bthread/types.h
@@ -166,14 +166,27 @@ typedef struct {
size_t sampling_range;
} bthread_contention_site_t;
+struct mutex_owner_t {
+ bool hold;
+ uint64_t id;
+};
+
typedef struct bthread_mutex_t {
#if defined(__cplusplus)
- bthread_mutex_t() : butex(NULL), csite{} {}
+ bthread_mutex_t()
+ : butex(NULL), csite{}
+ , enable_csite(false)
+ , owner{false, 0} {}
+
DISALLOW_COPY_AND_ASSIGN(bthread_mutex_t);
#endif
unsigned* butex;
bthread_contention_site_t csite;
bool enable_csite;
+ // Note: Owner detection of the mutex comes with average execution
+ // slowdown of about 50%, so it is only used for debugging and is
+ // only available when the macro `BRPC_DEBUG_LOCK' = 1.
+ mutex_owner_t owner;
} bthread_mutex_t;
typedef struct {
diff --git a/test/bthread_mutex_unittest.cpp b/test/bthread_mutex_unittest.cpp
index b0802f9b..3163876c 100644
--- a/test/bthread_mutex_unittest.cpp
+++ b/test/bthread_mutex_unittest.cpp
@@ -230,6 +230,11 @@ TEST(MutexTest, performance) {
butil::Mutex base_mutex;
PerfTest(&base_mutex, (pthread_t*)NULL, thread_num, pthread_create,
pthread_join);
PerfTest(&base_mutex, (bthread_t*)NULL, thread_num,
bthread_start_background, bthread_join);
+
+ bthread::FastPthreadMutex fast_mutex;
+ PerfTest(&fast_mutex, (pthread_t*)NULL, thread_num, pthread_create,
pthread_join);
+ PerfTest(&fast_mutex, (bthread_t*)NULL, thread_num,
bthread_start_background, bthread_join);
+
bthread::Mutex bth_mutex;
PerfTest(&bth_mutex, (pthread_t*)NULL, thread_num, pthread_create,
pthread_join);
PerfTest(&bth_mutex, (bthread_t*)NULL, thread_num,
bthread_start_background, bthread_join);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]