This is an automated email from the ASF dual-hosted git repository.
wwbmmm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brpc.git
The following commit(s) were added to refs/heads/master by this push:
new a18463f6 Support task tracer (#2851)
a18463f6 is described below
commit a18463f6c380331065efbf3eb954d6d379dd6d2d
Author: Bright Chen <[email protected]>
AuthorDate: Mon Jan 6 14:43:55 2025 +0800
Support task tracer (#2851)
* Support task tracer
* Opt signal trace
* Rename BRPC_VALIDATE_GFLAG to BUTIL_VALIDATE_GFLAG
* Update picture of document
---
.github/actions/init-ut-make-config/action.yml | 13 +
.github/actions/install-all-dependences/action.yml | 2 +-
.github/workflows/ci-linux.yml | 46 +-
CMakeLists.txt | 20 +
README_cn.md | 1 +
config_brpc.sh | 29 +-
docs/cn/bthread_tracer.md | 79 +++
docs/cn/getting_started.md | 8 +-
docs/en/getting_started.md | 8 +-
docs/images/bthread_status_model.svg | 3 +
docs/images/bthread_stb_model.svg | 3 +
src/brpc/builtin/bthreads_service.cpp | 18 +-
src/brpc/input_messenger.cpp | 3 +-
src/brpc/reloadable_flags.cpp | 26 +-
src/brpc/reloadable_flags.h | 47 +-
src/brpc/shared_object.h | 46 +-
src/bthread/bthread.cpp | 75 +++
src/bthread/bthread.h | 19 +-
src/bthread/butex.cpp | 33 +-
src/bthread/execution_queue.cpp | 4 +-
src/bthread/execution_queue_inl.h | 12 +-
src/bthread/task_control.cpp | 45 +-
src/bthread/task_control.h | 20 +-
src/bthread/task_group.cpp | 123 +++--
src/bthread/task_group.h | 17 +-
src/bthread/task_group_inl.h | 8 +-
src/bthread/task_meta.h | 57 ++-
src/bthread/task_tracer.cpp | 569 +++++++++++++++++++++
src/bthread/task_tracer.h | 143 ++++++
src/butil/debug/stack_trace.cc | 13 +-
src/butil/debug/stack_trace.h | 1 +
src/butil/debug/stack_trace_posix.cc | 22 +
src/butil/memory/scope_guard.h | 6 +-
src/butil/reloadable_flags.h | 75 +++
src/{brpc => butil}/shared_object.h | 10 +-
src/butil/time.h | 2 +-
test/brpc_builtin_service_unittest.cpp | 31 +-
test/bthread_unittest.cpp | 54 +-
38 files changed, 1439 insertions(+), 252 deletions(-)
diff --git a/.github/actions/init-ut-make-config/action.yml
b/.github/actions/init-ut-make-config/action.yml
new file mode 100644
index 00000000..b13800ed
--- /dev/null
+++ b/.github/actions/init-ut-make-config/action.yml
@@ -0,0 +1,13 @@
+inputs:
+ options:
+ description: extra options for config_brpc.sh
+ required: false
+runs:
+ using: "composite"
+ steps:
+ - run: sudo git clone https://github.com/libunwind/libunwind.git && cd
libunwind && sudo git checkout tags/v1.8.1 && sudo mkdir -p /libunwind && sudo
autoreconf -i && sudo CC=clang CXX=clang++ ./configure --prefix=/libunwind &&
sudo make -j ${{env.proc_num}} && sudo make install
+ shell: bash
+ - run: sudo apt-get update && sudo apt-get install -y libgtest-dev cmake
gdb libstdc++6-9-dbg && cd /usr/src/gtest && sudo cmake . && sudo make -j
${{env.proc_num}} && sudo mv lib/libgtest* /usr/lib/
+ shell: bash
+ - run: sh config_brpc.sh --headers="/libunwind/include /usr/include"
--libs="/libunwind/lib /usr/lib /usr/lib64" --nodebugsymbols ${{inputs.options}}
+ shell: bash
diff --git a/.github/actions/install-all-dependences/action.yml
b/.github/actions/install-all-dependences/action.yml
index f507aa40..8ff502f8 100644
--- a/.github/actions/install-all-dependences/action.yml
+++ b/.github/actions/install-all-dependences/action.yml
@@ -2,7 +2,7 @@ runs:
using: "composite"
steps:
- uses: ./.github/actions/install-essential-dependences
- - run: sudo apt-get install -y libgoogle-glog-dev automake bison flex
libboost-all-dev libevent-dev libtool pkg-config libibverbs1 libibverbs-dev
+ - run: sudo apt-get install -y libgoogle-glog-dev automake bison flex
libboost-all-dev libevent-dev libtool pkg-config libibverbs1 libibverbs-dev
libunwind8-dev
shell: bash
- run: wget
https://archive.apache.org/dist/thrift/0.11.0/thrift-0.11.0.tar.gz && tar -xf
thrift-0.11.0.tar.gz
shell: bash
diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 14961ebb..1854483a 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -31,7 +31,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: ./.github/actions/install-essential-dependences
- - name: cmake
+ - name: cmake
run: |
export CC=gcc && export CXX=g++
mkdir build
@@ -47,7 +47,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- run: bazel test --verbose_failures -- //... -//example/...
-
+
gcc-compile-with-boringssl:
runs-on: ubuntu-20.04
steps:
@@ -61,7 +61,7 @@ jobs:
- uses: ./.github/actions/install-all-dependences
- uses: ./.github/actions/init-make-config
with:
- options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma
--with-debug-bthread-sche-safety --with-debug-lock
+ options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma
--with-debug-bthread-sche-safety --with-debug-lock --with-bthread-tracer
- name: compile
run: |
make -j ${{env.proc_num}}
@@ -76,7 +76,7 @@ jobs:
export CC=gcc && export CXX=g++
mkdir build
cd build
- cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON ..
+ cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON
-WITH_BTHREAD_TRACER=ON ..
- name: compile
run: |
cd build
@@ -105,7 +105,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: ./.github/actions/install-essential-dependences
- - name: cmake
+ - name: cmake
run: |
export CC=clang && export CXX=clang++
mkdir build
@@ -135,7 +135,7 @@ jobs:
- uses: ./.github/actions/install-all-dependences
- uses: ./.github/actions/init-make-config
with:
- options: --cc=clang --cxx=clang++ --with-thrift --with-glog
--with-rdma --with-debug-bthread-sche-safety --with-debug-lock
+ options: --cc=clang --cxx=clang++ --with-thrift --with-glog
--with-rdma --with-debug-bthread-sche-safety --with-debug-lock
--with-bthread-tracer
- name: compile
run: |
make -j ${{env.proc_num}}
@@ -150,7 +150,7 @@ jobs:
export CC=clang && export CXX=clang++
mkdir build
cd build
- cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON ..
+ cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON
-WITH_BTHREAD_TRACER=ON ..
- name: compile
run: |
cd build
@@ -165,21 +165,17 @@ jobs:
clang-unittest:
runs-on: ubuntu-20.04
steps:
- - uses: actions/checkout@v2
- - uses: ./.github/actions/install-essential-dependences
- - name: install gtest
- run: |
- sudo apt-get update
- sudo apt-get install -y cmake libgtest-dev gdb
- cd /usr/src/gtest && sudo cmake . && sudo make && sudo mv
lib/libgtest* /usr/lib/
- - uses: ./.github/actions/init-make-config
- with:
- options: --cc=clang --cxx=clang++
- - name: compile tests
- run: |
- cd test
- make -j ${{env.proc_num}}
- - name: run tests
- run: |
- cd test
- sh ./run_tests.sh
+ - uses: actions/checkout@v2
+ - uses: ./.github/actions/install-essential-dependences
+ - uses: ./.github/actions/init-ut-make-config
+ with:
+ options: --cc=clang --cxx=clang++ --with-bthread-tracer
+ - name: compile tests
+ run: |
+ cat config.mk
+ cd test
+ make -j ${{env.proc_num}}
+ - name: run tests
+ run: |
+ cd test
+ sh ./run_tests.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3688766..a6b7716b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,7 @@ option(WITH_BORINGSSL "With BoringSSL" OFF)
option(DEBUG "Print debug logs" OFF)
option(WITH_DEBUG_SYMBOLS "With debug symbols" ON)
option(WITH_THRIFT "With thrift framed protocol supported" OFF)
+option(WITH_BTHREAD_TRACER "With bthread tracer supported" OFF)
option(WITH_SNAPPY "With snappy" OFF)
option(WITH_RDMA "With RDMA" OFF)
option(WITH_DEBUG_BTHREAD_SCHE_SAFETY "With debugging bthread sche safety" OFF)
@@ -81,6 +82,20 @@ if(WITH_THRIFT)
endif()
endif()
+if (WITH_BTHREAD_TRACER)
+ if (NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux") OR NOT
(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64"))
+ message(FATAL_ERROR "bthread tracer is only supported on Linux x86_64
platform")
+ endif()
+ find_path(LIBUNWIND_INCLUDE_PATH NAMES libunwind.h)
+ find_library(LIBUNWIND_LIB NAMES unwind)
+ find_library(LIBUNWIND_X86_64_LIB NAMES unwind-x86_64)
+ if (NOT LIBUNWIND_INCLUDE_PATH OR NOT LIBUNWIND_LIB)
+ message(FATAL_ERROR "Fail to find libunwind, which is needed by
bthread tracer")
+ endif()
+ add_definitions(-DBRPC_BTHREAD_TRACER)
+ include_directories(${LIBUNWIND_INCLUDE_PATH})
+endif ()
+
set(WITH_RDMA_VAL "0")
if(WITH_RDMA)
set(WITH_RDMA_VAL "1")
@@ -319,6 +334,11 @@ if(WITH_SNAPPY)
set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lsnappy")
endif()
+if (WITH_BTHREAD_TRACER)
+ set(DYNAMIC_LIB ${DYNAMIC_LIB} ${LIBUNWIND_LIB} ${LIBUNWIND_X86_64_LIB})
+ set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lunwind -lunwind-x86_64")
+endif()
+
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(DYNAMIC_LIB ${DYNAMIC_LIB} rt)
set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lrt")
diff --git a/README_cn.md b/README_cn.md
index 66f194e8..bed6e843 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -38,6 +38,7 @@
* [bthread or not](docs/cn/bthread_or_not.md)
* [thread-local](docs/cn/thread_local.md)
* [Execution Queue](docs/cn/execution_queue.md)
+ * [bthread tracer](docs/cn/bthread_tracer.md)
* Client
* [基础功能](docs/cn/client.md)
* [错误码](docs/cn/error_code.md)
diff --git a/config_brpc.sh b/config_brpc.sh
index fa607292..2563b0e2 100755
--- a/config_brpc.sh
+++ b/config_brpc.sh
@@ -38,11 +38,12 @@ else
LDD=ldd
fi
-TEMP=`getopt -o v: --long
headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols
-n 'config_brpc' -- "$@"`
+TEMP=`getopt -o v: --long
headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-bthread-tracer,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols
-n 'config_brpc' -- "$@"`
WITH_GLOG=0
WITH_THRIFT=0
WITH_RDMA=0
WITH_MESALINK=0
+WITH_BTHREAD_TRACER=0
BRPC_DEBUG_BTHREAD_SCHE_SAFETY=0
DEBUGSYMBOLS=-g
BRPC_DEBUG_LOCK=0
@@ -69,6 +70,7 @@ while true; do
--with-thrift) WITH_THRIFT=1; shift 1 ;;
--with-rdma) WITH_RDMA=1; shift 1 ;;
--with-mesalink) WITH_MESALINK=1; shift 1 ;;
+ --with-bthread-tracer) WITH_BTHREAD_TRACER=1; shift 1 ;;
--with-debug-bthread-sche-safety ) BRPC_DEBUG_BTHREAD_SCHE_SAFETY=1;
shift 1 ;;
--with-debug-lock ) BRPC_DEBUG_LOCK=1; shift 1 ;;
--nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;;
@@ -352,8 +354,27 @@ fi
LEVELDB_HDR=$(find_dir_of_header_or_die leveldb/db.h)
-HDRS=$($ECHO
"$GFLAGS_HDR\n$PROTOBUF_HDR\n$ABSL_HDR\n$LEVELDB_HDR\n$OPENSSL_HDR" | sort |
uniq)
-LIBS=$($ECHO
"$GFLAGS_LIB\n$PROTOBUF_LIB\n$ABSL_LIB\n$LEVELDB_LIB\n$OPENSSL_LIB\n$SNAPPY_LIB"
| sort | uniq)
+CPPFLAGS=
+
+if [ $WITH_BTHREAD_TRACER != 0 ]; then
+ if [ "$SYSTEM" != "Linux" ] || [ "$(uname -m)" != "x86_64" ]; then
+ >&2 $ECHO "bthread tracer is only supported on Linux x86_64 platform"
+ exit 1
+ fi
+ LIBUNWIND_HDR=$(find_dir_of_header_or_die libunwind.h)
+ LIBUNWIND_LIB=$(find_dir_of_lib_or_die unwind)
+
+ CPPFLAGS="${CPPFLAGS} -DBRPC_BTHREAD_TRACER"
+
+ if [ -f "$LIBUNWIND_LIB/libunwind.$SO" ]; then
+ DYNAMIC_LINKINGS="$DYNAMIC_LINKINGS -lunwind -lunwind-x86_64"
+ else
+ STATIC_LINKINGS="$STATIC_LINKINGS -lunwind -lunwind-x86_64"
+ fi
+fi
+
+HDRS=$($ECHO
"$LIBUNWIND_HDR\n$GFLAGS_HDR\n$PROTOBUF_HDR\n$ABSL_HDR\n$LEVELDB_HDR\n$OPENSSL_HDR"
| sort | uniq)
+LIBS=$($ECHO
"$LIBUNWIND_LIB\n$GFLAGS_LIB\n$PROTOBUF_LIB\n$ABSL_LIB\n$LEVELDB_LIB\n$OPENSSL_LIB\n$SNAPPY_LIB"
| sort | uniq)
absent_in_the_list() {
TMP=`$ECHO "$1\n$2" | sort | uniq`
@@ -411,7 +432,7 @@ append_to_output "STATIC_LINKINGS=$STATIC_LINKINGS"
append_to_output "DYNAMIC_LINKINGS=$DYNAMIC_LINKINGS"
# CPP means C PreProcessing, not C PlusPlus
-CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY
-DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK"
+CPPFLAGS="${CPPFLAGS} -DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY
-DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK"
# Avoid over-optimizations of TLS variables by GCC>=4.8
# See: https://github.com/apache/brpc/issues/1693
diff --git a/docs/cn/bthread_tracer.md b/docs/cn/bthread_tracer.md
new file mode 100644
index 00000000..7758bee1
--- /dev/null
+++ b/docs/cn/bthread_tracer.md
@@ -0,0 +1,79 @@
+gdb(ptrace)+ gdb_bthread_stack.py主要的缺点是要慢和阻塞进程,需要一种高效的追踪bthread调用栈的方法。
+
+bRPC框架的协作式用户态协程无法像Golang内建的抢占式协程一样实现高效的STW(Stop the
World),框架也无法干预用户逻辑的执行,所以要追踪bthread调用栈是比较困难的。
+
+在线追踪bthread调用栈需要解决以下问题:
+1. 追踪挂起bthread的调用栈。
+2. 追踪运行中bthread的调用栈。
+
+# bthread状态模型
+
+以下是目前的bthread状态模型。
+
+
+
+# 设计方案
+
+## 核心思路
+
+为了解决上述两个问题,该方案实现了STB(Stop The
Bthread),核心思路可以简单总结为,在追踪bthread调用栈的过程中,状态不能流转到当前追踪方法不支持的状态。STB包含了两种追踪模式:上下文(context)追踪模式和信号追踪模式。
+
+### 上下文(context)追踪模式
+上下文追踪模式可以追踪挂起bthread的调用栈。挂起的bthread栈是稳定的,利用TaskMeta.stack中保存的上下文信息(x86_64下关键的寄存器主要是RIP、RSP、RBP),通过一些可以回溯指定上下文调用栈的库来追踪bthread调用栈。但是挂起的bthread随时可能会被唤醒,执行逻辑(包括jump_stack),则bthread栈会一直变化。不稳定的上下文是不能用来追踪调用栈的,需要在jump_stack前拦截bthread的调度,等到调用栈追踪完成后才继续运行bthread。所以,上下文追踪模式支持就绪、挂起这两个状态。
+
+### 信号追踪模式
+
+信号追踪模式可以追踪运行中bthread的调用栈。运行中bthread是不稳定的,不能使用TaskMeta.stack来追踪bthread调用栈。只能另辟蹊径,使用信号中断bthread运行逻辑,在信号处理函数中回溯bthread调用栈。使用信号有两个问题:
+
+1. 异步信号安全问题。
+2.
信号追踪模式不支持jump_stack。调用栈回溯需要寄存器信息,但jump_stack会操作寄存器,这个过程是不安全的,所以jump_stack不能被信号中断,需要在jump_stack前拦截bthread的调度,等到bthread调用栈追踪完成后才继续挂起bthread。
+
+所以,追踪模式只支持运行状态。
+
+### 小结
+
+jump_stack是bthread挂起或者运行的必经之路,也是STB的拦截点。STB将状态分成三类:
+1. 上下文追踪模式的状态:就绪、挂起。
+2. 支持信号追踪模式的状态:运行。
+3.
不支持追踪的状态。jump_stack的过程是不允许使用以上两种调用栈追踪方法,需要在jump_stack前拦截bthread的调度,等到调用栈追踪完成后才继续调度bthread。
+
+### 详细流程
+
+以下是引入STB后的bthread状态模型,在原来bthread状态模型的基础上,加入两个状态(拦截点):将运行、挂起中。
+
+
+
+经过上述分析,总结出STB的流程:
+
+1.
TaskTracer(实现STB的一个模块)收到追踪bthread调用栈的请求时,标识正在追踪。追踪完成后,标识追踪完成,并TaskTracer发信号通知可能处于将运行或者挂起中状态的bthread。根据bthread状态,TaskTracer执行不同的逻辑:
+- 创建、就绪但还没分配栈、销毁:直接结束追踪。
+- 挂起、就绪:使用上下文追踪模式追踪bthread的调用栈。
+- 运行:使用信号追踪模式追踪bthread的调用栈。
+- 将运行、挂起中:TaskTracer自旋等到bthread状态流转到下一个状态(挂起或者运行)后继续追踪。
+
+2. TaskTracer追踪时,bthread根据状态也会执行不同的逻辑:
+- 创建、就绪但还没分配栈、就绪:不需要额外处理。
+- 挂起、运行:通知TaskTracer继续追踪。
+-
将运行、挂起中、销毁:bthread通过条件变量等到TaskTracer追踪完成。TaskTracer追踪完成后会通过条件变量通知bthread继续执行jump_stack。
+
+# 使用方法
+
+1. 下载安装libunwind。
+2.
给config_brpc.sh增加`--with-bthread-tracer`选项或者给cmake增加`-DWITH_BTHREAD_TRACER=ON`选项。
+3.
访问服务的内置服务:`http://ip:port/bthreads/<bthread_id>?st=1`或者代码里调用`bthread::stack_trace()`函数。
+4.
如果希望追踪pthread的调用栈,在对应pthread上调用`bthread::init_for_pthread_stack_trace()`函数获取一个伪bthread_t,然后使用步骤3即可获取pthread调用栈。
+
+下面是追踪bthread调用栈的输出示例:
+```shell
+#0 0x00007fdbbed500b5 __clock_gettime_2
+#1 0x000000000041f2b6 butil::cpuwide_time_ns()
+#2 0x000000000041f289 butil::cpuwide_time_us()
+#3 0x000000000041f1b9 butil::EveryManyUS::operator bool()
+#4 0x0000000000413289 (anonymous namespace)::spin_and_log()
+#5 0x00007fdbbfa58dc0 bthread::TaskGroup::task_runner()
+```
+
+# 相关flag
+
+-
`enable_fast_unwind`:是否启用快速回溯功能,默认为true。大多数情况下,不需要关闭快速回溯功能。除非你关注的调用栈函数名转换失败,显示为`<unknown>`,则可以尝试关闭快速回溯功能,但这会导致性能下降。以包含30帧的调用栈举例,快速回溯基本上在200us以内就可以完成,而关闭快速回溯则需要4ms左右,性能下降了近20倍。
+-
`signal_trace_timeout_ms`:信号追踪模式的超时时间,默认为50ms。虽然libunwind文档显示回溯功能是异步信号安全的,但是[gpertools社区发现libunwind在某些情况下会死锁](https://github.com/gperftools/gperftools/issues/775),所以TaskTracer会设置了超时时间,超时后会放弃回溯,打破死锁。
\ No newline at end of file
diff --git a/docs/cn/getting_started.md b/docs/cn/getting_started.md
index 13f84805..76aa6f86 100644
--- a/docs/cn/getting_started.md
+++ b/docs/cn/getting_started.md
@@ -382,7 +382,7 @@ brpc默认**不**链接
[tcmalloc](http://goog-perftools.sourceforge.net/doc/tcm
## glog: 3.3+
-brpc实现了一个默认的[日志功能](../../src/butil/logging.h)它和glog冲突。要替换成glog,可以给config_brpc.sh增加*--with-glog*选项或者给cmake增加`-DWITH_GLOG=ON`选项。
+brpc实现了一个默认的[日志功能](../../src/butil/logging.h)它和glog冲突。要替换成glog,可以给config_brpc.sh增加`--with-glog`选项或者给cmake增加`-DWITH_GLOG=ON`选项。
## valgrind: 3.8+
@@ -392,6 +392,12 @@ brpc会自动检测valgrind(然后注册bthread的栈)。不支持老版本
无已知问题。
+## libunwind: 1.3-1.8.1
+
+bRPC默认**不**链接
[libunwind](https://github.com/libunwind/libunwind)。用户需要追踪bthread功能则链接libunwind,可以给config_brpc.sh增加`--with-bthread-tracer`选项或者给cmake增加`-DWITH_BTHREAD_TRACER=ON`选项。
+
+建议使用最新版本的libunwind。
+
# 实例追踪
我们提供了一个程序去帮助你追踪和监控所有brpc实例。 只需要在某处运行
[trackme_server](https://github.com/apache/brpc/tree/master/tools/trackme_server/)
然后再带着
-trackme_server=SERVER参数启动需要被追踪的实例。trackme_server将从实例周期性地收到ping消息然后打印日志。您可以从日志中聚合实例地址,并调用实例的内置服务以获取更多信息。
diff --git a/docs/en/getting_started.md b/docs/en/getting_started.md
index a8197620..a2ae80d4 100644
--- a/docs/en/getting_started.md
+++ b/docs/en/getting_started.md
@@ -377,7 +377,7 @@ When you remove tcmalloc, not only remove the linkage with
tcmalloc but also the
## glog: 3.3+
-brpc implements a default [logging utility](../../src/butil/logging.h) which
conflicts with glog. To replace this with glog, add *--with-glog* to
config_brpc.sh or add `-DWITH_GLOG=ON` to cmake.
+brpc implements a default [logging utility](../../src/butil/logging.h) which
conflicts with glog. To replace this with glog, add `--with-glog` to
config_brpc.sh or add `-DWITH_GLOG=ON` to cmake.
## valgrind: 3.8+
@@ -385,6 +385,12 @@ brpc detects valgrind automatically (and registers stacks
of bthread). Older val
## thrift: 0.9.3-0.11.0
+## libunwind: 1.3-1.8.1
+
+brpc does **not** link [libunwind](https://github.com/libunwind/libunwind) by
default. Users link libunwind on-demand by adding `--with-glog` to
config_brpc.sh or adding `-DWITH_GLOG=ON` to cmake.
+
+It is recommended to use the latest possible version of libunwind.
+
no known issues.
# Track instances
diff --git a/docs/images/bthread_status_model.svg
b/docs/images/bthread_status_model.svg
new file mode 100644
index 00000000..46379c96
--- /dev/null
+++ b/docs/images/bthread_status_model.svg
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="1045px"
height="669px" viewBox="-0.5 -0.5 1045 669" style="background-color: rgb(255,
255, 255);"><defs/><rect fill="#ffffff" width="100%" height="100%" x="0"
y="0"/><g><g data-cell-id="0"><g data-cell-id="1"><g
data-cell-id="DboKUKvH6xi3xOyzVJv4-1887"><g><rect x="151" y="153" width="150"
height="60" rx="4.2" ry="4.2" fill="#e85642" stroke="none"
pointer-events="all"/></g><g><g transfo [...]
\ No newline at end of file
diff --git a/docs/images/bthread_stb_model.svg
b/docs/images/bthread_stb_model.svg
new file mode 100644
index 00000000..5dcc3683
--- /dev/null
+++ b/docs/images/bthread_stb_model.svg
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="1059px"
height="779px" viewBox="-0.5 -0.5 1059 779" style="background-color: rgb(255,
255, 255);"><defs/><rect fill="#ffffff" width="100%" height="100%" x="0"
y="0"/><g><g data-cell-id="g0N0ArMlp0iuiEGkKfUx-0"><g
data-cell-id="g0N0ArMlp0iuiEGkKfUx-1"><g
data-cell-id="g0N0ArMlp0iuiEGkKfUx-35"><g><rect x="168" y="113" width="150"
height="60" rx="4.2" ry="4.2" fill="#e85642" stroke="none" [...]
\ No newline at end of file
diff --git a/src/brpc/builtin/bthreads_service.cpp
b/src/brpc/builtin/bthreads_service.cpp
index 209f7a99..7676f656 100644
--- a/src/brpc/builtin/bthreads_service.cpp
+++ b/src/brpc/builtin/bthreads_service.cpp
@@ -16,14 +16,14 @@
// under the License.
-#include <ostream>
+#include "bthread/bthread.h"
#include "brpc/closure_guard.h" // ClosureGuard
#include "brpc/controller.h" // Controller
#include "brpc/builtin/common.h"
#include "brpc/builtin/bthreads_service.h"
namespace bthread {
-void print_task(std::ostream& os, bthread_t tid);
+extern void print_task(std::ostream& os, bthread_t tid);
}
@@ -40,12 +40,24 @@ void
BthreadsService::default_method(::google::protobuf::RpcController* cntl_bas
const std::string& constraint = cntl->http_request().unresolved_path();
if (constraint.empty()) {
+#ifdef BRPC_BTHREAD_TRACER
+ os << "Use /bthreads/<bthread_id> or /bthreads/<bthread_id>?st=1 for
stack trace";
+#else
os << "Use /bthreads/<bthread_id>";
+#endif // BRPC_BTHREAD_TRACER
} else {
char* endptr = NULL;
bthread_t tid = strtoull(constraint.c_str(), &endptr, 10);
- if (*endptr == '\0' || *endptr == '/') {
+ if (*endptr == '\0' || *endptr == '/' || *endptr == '?') {
::bthread::print_task(os, tid);
+
+#ifdef BRPC_BTHREAD_TRACER
+ const std::string* st = cntl->http_request().uri().GetQuery("st");
+ if (NULL != st && *st == "1") {
+ os << "\nbthread call stack:\n";
+ ::bthread::stack_trace(os, tid);
+ }
+#endif // BRPC_BTHREAD_TRACER
} else {
cntl->SetFailed(ENOMETHOD, "path=%s is not a bthread id",
constraint.c_str());
diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp
index d6e1c356..b28c7044 100644
--- a/src/brpc/input_messenger.cpp
+++ b/src/brpc/input_messenger.cpp
@@ -287,8 +287,7 @@ int InputMessenger::ProcessNewMessage(
// This unique_ptr prevents msg to be lost before transfering
// ownership to last_msg
DestroyingPtr<InputMessageBase> msg(pr.message());
- QueueMessage(last_msg.release(), &num_bthread_created,
- m->_keytable_pool);
+ QueueMessage(last_msg.release(), &num_bthread_created,
m->_keytable_pool);
if (_handlers[index].process == NULL) {
LOG(ERROR) << "process of index=" << index << " is NULL";
continue;
diff --git a/src/brpc/reloadable_flags.cpp b/src/brpc/reloadable_flags.cpp
index 5f5ca026..958dc057 100644
--- a/src/brpc/reloadable_flags.cpp
+++ b/src/brpc/reloadable_flags.cpp
@@ -15,10 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-
-#include <unistd.h> // write, _exit
-#include <gflags/gflags.h>
-#include "butil/macros.h"
#include "brpc/reloadable_flags.h"
namespace brpc {
@@ -62,37 +58,25 @@ bool NonNegativeInteger(const char*, int64_t val) {
return val >= 0;
}
-template <typename T>
-static bool RegisterFlagValidatorOrDieImpl(
- const T* flag, bool (*validate_fn)(const char*, T val)) {
- if (GFLAGS_NS::RegisterFlagValidator(flag, validate_fn)) {
- return true;
- }
- // Error printed by gflags does not have newline. Add one to it.
- char newline = '\n';
- butil::ignore_result(write(2, &newline, 1));
- _exit(1);
-}
-
bool RegisterFlagValidatorOrDie(const bool* flag,
bool (*validate_fn)(const char*, bool)) {
- return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+ return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
}
bool RegisterFlagValidatorOrDie(const int32_t* flag,
bool (*validate_fn)(const char*, int32_t)) {
- return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+ return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
}
bool RegisterFlagValidatorOrDie(const int64_t* flag,
bool (*validate_fn)(const char*, int64_t)) {
- return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+ return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
}
bool RegisterFlagValidatorOrDie(const uint64_t* flag,
bool (*validate_fn)(const char*, uint64_t)) {
- return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+ return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
}
bool RegisterFlagValidatorOrDie(const double* flag,
bool (*validate_fn)(const char*, double)) {
- return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+ return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
}
} // namespace brpc
diff --git a/src/brpc/reloadable_flags.h b/src/brpc/reloadable_flags.h
index c451bde8..60b7851f 100644
--- a/src/brpc/reloadable_flags.h
+++ b/src/brpc/reloadable_flags.h
@@ -19,28 +19,9 @@
#ifndef BRPC_RELOADABLE_FLAGS_H
#define BRPC_RELOADABLE_FLAGS_H
-// To brpc developers: This is a header included by user, don't depend
-// on internal structures, use opaque pointers instead.
-
-#include <stdint.h>
-
-// Register an always-true valiator to a gflag so that the gflag is treated as
-// reloadable by brpc. If a validator exists, abort the program.
-// You should call this macro within global scope. for example:
-//
-// DEFINE_int32(foo, 0, "blah blah");
-// BRPC_VALIDATE_GFLAG(foo, brpc::PassValidate);
-//
-// This macro does not work for string-flags because they're thread-unsafe to
-// modify directly. To emphasize this, you have to write the validator by
-// yourself and use GFLAGS_NS::GetCommandLineOption() to acess the flag.
-#define BRPC_VALIDATE_GFLAG(flag, validate_fn) \
- namespace brpc_flags {} \
- const int register_FLAGS_ ## flag ## _dummy \
- __attribute__((__unused__)) = \
- ::brpc::RegisterFlagValidatorOrDie( \
- &FLAGS_##flag, (validate_fn))
+#include "butil/reloadable_flags.h"
+#define BRPC_VALIDATE_GFLAG(flag, validate_fn) BUTIL_VALIDATE_GFLAG(flag,
validate_fn)
namespace brpc {
@@ -59,18 +40,18 @@ extern bool PositiveInteger(const char*, uint64_t);
extern bool NonNegativeInteger(const char*, int32_t);
extern bool NonNegativeInteger(const char*, int64_t);
-extern bool RegisterFlagValidatorOrDie(const bool* flag,
- bool (*validate_fn)(const char*, bool));
-extern bool RegisterFlagValidatorOrDie(const int32_t* flag,
- bool (*validate_fn)(const char*, int32_t));
-extern bool RegisterFlagValidatorOrDie(const uint32_t* flag,
- bool (*validate_fn)(const char*, uint32_t));
-extern bool RegisterFlagValidatorOrDie(const int64_t* flag,
- bool (*validate_fn)(const char*, int64_t));
-extern bool RegisterFlagValidatorOrDie(const uint64_t* flag,
- bool (*validate_fn)(const char*, uint64_t));
-extern bool RegisterFlagValidatorOrDie(const double* flag,
- bool (*validate_fn)(const char*, double));
+extern bool RegisterFlagValidatorOrDie(
+ const bool* flag, bool (*validate_fn)(const char*, bool));
+extern bool RegisterFlagValidatorOrDie(
+ const int32_t* flag, bool (*validate_fn)(const char*, int32_t));
+extern bool RegisterFlagValidatorOrDie(
+ const uint32_t* flag, bool (*validate_fn)(const char*, uint32_t));
+extern bool RegisterFlagValidatorOrDie(
+ const int64_t* flag, bool (*validate_fn)(const char*, int64_t));
+extern bool RegisterFlagValidatorOrDie(
+ const uint64_t* flag, bool (*validate_fn)(const char*, uint64_t));
+extern bool RegisterFlagValidatorOrDie(
+ const double* flag, bool (*validate_fn)(const char*, double));
} // namespace brpc
diff --git a/src/brpc/shared_object.h b/src/brpc/shared_object.h
index d8ff9aae..296bdea3 100644
--- a/src/brpc/shared_object.h
+++ b/src/brpc/shared_object.h
@@ -19,53 +19,11 @@
#ifndef BRPC_SHARED_OBJECT_H
#define BRPC_SHARED_OBJECT_H
-#include "butil/intrusive_ptr.hpp" // butil::intrusive_ptr
-#include "butil/atomicops.h"
-
+#include "butil/shared_object.h"
namespace brpc {
-// Inherit this class to be intrusively shared. Comparing to shared_ptr,
-// intrusive_ptr saves one malloc (for shared_count) and gets better cache
-// locality when the ref/deref are frequent, in the cost of lack of weak_ptr
-// and worse interface.
-class SharedObject {
-friend void intrusive_ptr_add_ref(SharedObject*);
-friend void intrusive_ptr_release(SharedObject*);
-
-public:
- SharedObject() : _nref(0) { }
- int ref_count() const { return _nref.load(butil::memory_order_relaxed); }
-
- // Add ref and returns the ref_count seen before added.
- // The effect is basically same as butil::intrusive_ptr<T>(obj).detach()
- // except that the latter one does not return the seen ref_count which is
- // useful in some scenarios.
- int AddRefManually()
- { return _nref.fetch_add(1, butil::memory_order_relaxed); }
-
- // Remove one ref, if the ref_count hit zero, delete this object.
- // Same as butil::intrusive_ptr<T>(obj, false).reset(NULL)
- void RemoveRefManually() {
- if (_nref.fetch_sub(1, butil::memory_order_release) == 1) {
- butil::atomic_thread_fence(butil::memory_order_acquire);
- delete this;
- }
- }
-
-protected:
- virtual ~SharedObject() { }
-private:
- butil::atomic<int> _nref;
-};
-
-inline void intrusive_ptr_add_ref(SharedObject* obj) {
- obj->AddRefManually();
-}
-
-inline void intrusive_ptr_release(SharedObject* obj) {
- obj->RemoveRefManually();
-}
+using butil::SharedObject;
} // namespace brpc
diff --git a/src/bthread/bthread.cpp b/src/bthread/bthread.cpp
index b834b42a..3de72657 100644
--- a/src/bthread/bthread.cpp
+++ b/src/bthread/bthread.cpp
@@ -19,9 +19,11 @@
// Date: Tue Jul 10 17:40:58 CST 2012
+#include <sys/syscall.h>
#include <gflags/gflags.h>
#include "butil/macros.h" // BAIDU_CASSERT
#include "butil/logging.h"
+#include "butil/thread_local.h"
#include "bthread/task_group.h" // TaskGroup
#include "bthread/task_control.h" // TaskControl
#include "bthread/timer_thread.h"
@@ -115,6 +117,79 @@ inline TaskControl* get_or_new_task_control() {
return c;
}
+#ifdef BRPC_BTHREAD_TRACER
+BAIDU_THREAD_LOCAL TaskMeta* pthread_fake_meta = NULL;
+
+bthread_t init_for_pthread_stack_trace() {
+ if (NULL != pthread_fake_meta) {
+ return pthread_fake_meta->tid;
+ }
+
+ TaskControl* c = get_task_control();
+ if (NULL == c) {
+ LOG(ERROR) << "TaskControl has not been created, "
+ "please use bthread_start_xxx before call this function";
+ return INVALID_BTHREAD;
+ }
+
+ butil::ResourceId<TaskMeta> slot;
+ pthread_fake_meta = butil::get_resource(&slot);
+ if (BAIDU_UNLIKELY(NULL == pthread_fake_meta)) {
+ LOG(ERROR) << "Fail to get TaskMeta";
+ return INVALID_BTHREAD;
+ }
+
+ pthread_fake_meta->attr = BTHREAD_ATTR_PTHREAD;
+ pthread_fake_meta->tid = make_tid(*pthread_fake_meta->version_butex, slot);
+ // Make TaskTracer use signal trace mode for pthread.
+ c->_task_tracer.set_running_status(syscall(SYS_gettid), pthread_fake_meta);
+
+ // Release the TaskMeta at exit of pthread.
+ butil::thread_atexit([]() {
+ // Similar to TaskGroup::task_runner.
+ bool tracing;
+ {
+ BAIDU_SCOPED_LOCK(pthread_fake_meta->version_lock);
+ tracing = TaskTracer::set_end_status_unsafe(pthread_fake_meta);
+ // If resulting version is 0,
+ // change it to 1 to make bthread_t never be 0.
+ if (0 == ++*pthread_fake_meta->version_butex) {
+ ++*pthread_fake_meta->version_butex;
+ }
+ }
+
+ if (tracing) {
+ // Wait for tracing completion.
+ get_task_control()->_task_tracer.WaitForTracing(pthread_fake_meta);
+ }
+ get_task_control()->_task_tracer.set_status(
+ TASK_STATUS_UNKNOWN, pthread_fake_meta);
+
+ butil::return_resource(get_slot(pthread_fake_meta->tid));
+ pthread_fake_meta = NULL;
+ });
+
+ return pthread_fake_meta->tid;
+}
+
+void stack_trace(std::ostream& os, bthread_t tid) {
+ TaskControl* c = get_task_control();
+ if (NULL == c) {
+ os << "TaskControl has not been created";
+ return;
+ }
+ c->stack_trace(os, tid);
+}
+
+std::string stack_trace(bthread_t tid) {
+ TaskControl* c = get_task_control();
+ if (NULL == c) {
+ return "TaskControl has not been created";
+ }
+ return c->stack_trace(tid);
+}
+#endif // BRPC_BTHREAD_TRACER
+
static int add_workers_for_each_tag(int num) {
int added = 0;
auto c = get_task_control();
diff --git a/src/bthread/bthread.h b/src/bthread/bthread.h
index 72b32ad9..7e42c96c 100644
--- a/src/bthread/bthread.h
+++ b/src/bthread/bthread.h
@@ -28,12 +28,25 @@
#include "bthread/errno.h"
#if defined(__cplusplus)
-# include <iostream>
-# include "bthread/mutex.h" // use bthread_mutex_t in the RAII way
-#endif
+#include <iostream>
+#include "bthread/mutex.h" // use bthread_mutex_t in the RAII way
+#endif // __cplusplus
#include "bthread/id.h"
+#if defined(__cplusplus) && defined(BRPC_BTHREAD_TRACER)
+namespace bthread {
+// Assign a TaskMeta to the pthread and set the state to Running,
+// so that `stack_trace()' can trace the call stack of the pthread.
+bthread_t init_for_pthread_stack_trace();
+
+// Trace the call stack of the bthread, or pthread which has been
+// initialized by `init_for_pthread_stack_trace()'.
+void stack_trace(std::ostream& os, bthread_t tid);
+std::string stack_trace(bthread_t tid);
+} // namespace bthread
+#endif // __cplusplus && BRPC_BTHREAD_TRACER
+
__BEGIN_DECLS
// Create bthread `fn(args)' with attributes `attr' and put the identifier into
diff --git a/src/bthread/butex.cpp b/src/bthread/butex.cpp
index 25cb9539..c54198c6 100644
--- a/src/bthread/butex.cpp
+++ b/src/bthread/butex.cpp
@@ -283,11 +283,11 @@ inline TaskGroup* get_task_group(TaskControl* c,
bthread_tag_t tag) {
return is_same_tag(tag) ? tls_task_group : c->choose_one_group(tag);
}
-inline void run_in_local_task_group(TaskGroup* g, bthread_t tid, bool
nosignal) {
+inline void run_in_local_task_group(TaskGroup* g, TaskMeta* next_meta, bool
nosignal) {
if (!nosignal) {
- TaskGroup::exchange(&g, tid);
+ TaskGroup::exchange(&g, next_meta);
} else {
- g->ready_to_run(tid, nosignal);
+ g->ready_to_run(next_meta, nosignal);
}
}
@@ -311,9 +311,9 @@ int butex_wake(void* arg, bool nosignal) {
unsleep_if_necessary(bbw, get_global_timer_thread());
TaskGroup* g = get_task_group(bbw->control, bbw->tag);
if (g == tls_task_group) {
- run_in_local_task_group(g, bbw->tid, nosignal);
+ run_in_local_task_group(g, bbw->task_meta, nosignal);
} else {
- g->ready_to_run_remote(bbw->tid, check_nosignal(nosignal, g->tag()));
+ g->ready_to_run_remote(bbw->task_meta, check_nosignal(nosignal,
g->tag()));
}
return 1;
}
@@ -363,7 +363,7 @@ int butex_wake_n(void* arg, size_t n, bool nosignal) {
w->RemoveFromList();
unsleep_if_necessary(w, get_global_timer_thread());
auto g = get_task_group(w->control, w->tag);
- g->ready_to_run_general(w->tid, true);
+ g->ready_to_run_general(w->task_meta, true);
nwakeups[g->tag()] = g;
++nwakeup;
}
@@ -375,9 +375,9 @@ int butex_wake_n(void* arg, size_t n, bool nosignal) {
}
auto g = get_task_group(next->control, next->tag);
if (g == tls_task_group) {
- run_in_local_task_group(g, next->tid, nosignal);
+ run_in_local_task_group(g, next->task_meta, nosignal);
} else {
- g->ready_to_run_remote(next->tid, check_nosignal(nosignal, g->tag()));
+ g->ready_to_run_remote(next->task_meta, check_nosignal(nosignal,
g->tag()));
}
return nwakeup;
}
@@ -436,7 +436,7 @@ int butex_wake_except(void* arg, bthread_t
excluded_bthread) {
w->RemoveFromList();
unsleep_if_necessary(w, get_global_timer_thread());
auto g = get_task_group(w->control, w->tag);
- g->ready_to_run_general(w->tid, true);
+ g->ready_to_run_general(w->task_meta, true);
nwakeups[g->tag()] = g;
++nwakeup;
} while (!bthread_waiters.empty());
@@ -480,9 +480,9 @@ int butex_requeue(void* arg, void* arg2) {
unsleep_if_necessary(bbw, get_global_timer_thread());
auto g = is_same_tag(bbw->tag) ? tls_task_group : NULL;
if (g) {
- TaskGroup::exchange(&g, front->tid);
+ TaskGroup::exchange(&g, bbw->task_meta);
} else {
-
bbw->control->choose_one_group(bbw->tag)->ready_to_run_remote(front->tid);
+
bbw->control->choose_one_group(bbw->tag)->ready_to_run_remote(bbw->task_meta);
}
return 1;
}
@@ -520,7 +520,7 @@ inline bool erase_from_butex(ButexWaiter* bw, bool wakeup,
WaiterState state) {
if (erased && wakeup) {
if (bw->tid) {
ButexBthreadWaiter* bbw = static_cast<ButexBthreadWaiter*>(bw);
- get_task_group(bbw->control,
bbw->tag)->ready_to_run_general(bw->tid);
+ get_task_group(bbw->control,
bbw->tag)->ready_to_run_general(bbw->task_meta);
} else {
ButexPthreadWaiter* pw = static_cast<ButexPthreadWaiter*>(bw);
wakeup_pthread(pw);
@@ -535,7 +535,7 @@ struct WaitForButexArgs {
bool prepend;
};
-static void wait_for_butex(void* arg) {
+void wait_for_butex(void* arg) {
auto args = static_cast<WaitForButexArgs*>(arg);
ButexBthreadWaiter* const bw = args->bw;
Butex* const b = bw->initial_butex;
@@ -565,6 +565,9 @@ static void wait_for_butex(void* arg) {
b->waiters.Append(bw);
}
bw->container.store(b, butil::memory_order_relaxed);
+#ifdef BRPC_BTHREAD_TRACER
+ bw->control->_task_tracer.set_status(TASK_STATUS_SUSPENDED,
bw->task_meta);
+#endif // BRPC_BTHREAD_TRACER
if (bw->abstime != NULL) {
bw->sleep_id = get_global_timer_thread()->schedule(
erase_from_butex_and_wakeup, bw, *bw->abstime);
@@ -582,7 +585,7 @@ static void wait_for_butex(void* arg) {
// the two functions. The on-stack ButexBthreadWaiter is safe to use and
// bw->waiter_state will not change again.
// unsleep_if_necessary(bw, get_global_timer_thread());
- tls_task_group->ready_to_run(bw->tid);
+ tls_task_group->ready_to_run(bw->task_meta);
// FIXME: jump back to original thread is buggy.
// // Value unmatched or waiter is already woken up by TimerThread, jump
@@ -697,7 +700,7 @@ int butex_wait(void* arg, int expected_value, const
timespec* abstime, bool prep
// release fence matches with acquire fence in
interrupt_and_consume_waiters
// in task_group.cpp to guarantee visibility of `interrupted'.
bbw.task_meta->current_waiter.store(&bbw, butil::memory_order_release);
- WaitForButexArgs args{ &bbw, prepend};
+ WaitForButexArgs args{ &bbw, prepend };
g->set_remained(wait_for_butex, &args);
TaskGroup::sched(&g);
diff --git a/src/bthread/execution_queue.cpp b/src/bthread/execution_queue.cpp
index 557669ee..bb01882c 100644
--- a/src/bthread/execution_queue.cpp
+++ b/src/bthread/execution_queue.cpp
@@ -68,7 +68,7 @@ inline ExecutionQueueVars* get_execq_vars() {
void ExecutionQueueBase::start_execute(TaskNode* node) {
node->next = TaskNode::UNCONNECTED;
- node->status = UNEXECUTED;
+ node->status = TaskNode::UNEXECUTED;
node->iterated = false;
if (node->high_priority) {
// Add _high_priority_tasks before pushing this task into queue to
@@ -304,7 +304,7 @@ int ExecutionQueueBase::_execute(TaskNode* head, bool
high_priority, int* nitera
if (head != NULL && head->stop_task) {
CHECK(head->next == NULL);
head->iterated = true;
- head->status = EXECUTED;
+ head->status = TaskNode::EXECUTED;
TaskIteratorBase iter(NULL, this, true, false);
_execute_func(_meta, _type_specific_function, iter);
if (niterated) {
diff --git a/src/bthread/execution_queue_inl.h
b/src/bthread/execution_queue_inl.h
index 64725051..f5998a26 100644
--- a/src/bthread/execution_queue_inl.h
+++ b/src/bthread/execution_queue_inl.h
@@ -38,17 +38,17 @@ struct ExecutionQueueId {
uint64_t value;
};
-enum TaskStatus {
- UNEXECUTED = 0,
- EXECUTING = 1,
- EXECUTED = 2
-};
-
struct TaskNode;
class ExecutionQueueBase;
typedef void (*clear_task_mem)(TaskNode*);
struct BAIDU_CACHELINE_ALIGNMENT TaskNode {
+ enum TaskStatus {
+ UNEXECUTED = 0,
+ EXECUTING = 1,
+ EXECUTED = 2
+ };
+
TaskNode()
: version(0)
, status(UNEXECUTED)
diff --git a/src/bthread/task_control.cpp b/src/bthread/task_control.cpp
index d0549ea9..55ed1f2e 100644
--- a/src/bthread/task_control.cpp
+++ b/src/bthread/task_control.cpp
@@ -19,6 +19,7 @@
// Date: Tue Jul 10 17:40:58 CST 2012
+#include <sys/syscall.h> // SYS_gettid
#include "butil/scoped_lock.h" // BAIDU_SCOPED_LOCK
#include "butil/errno.h" // berror
#include "butil/logging.h"
@@ -88,14 +89,19 @@ void* TaskControl::worker_thread(void* arg) {
LOG(ERROR) << "Fail to create TaskGroup in pthread=" << pthread_self();
return NULL;
}
+
+ g->_tid = syscall(SYS_gettid);
+
std::string worker_thread_name = butil::string_printf(
- "brpc_wkr:%d-%d", g->tag(), c->_next_worker_id.fetch_add(1,
butil::memory_order_relaxed));
+ "brpc_wkr:%d-%d", g->tag(),
+ c->_next_worker_id.fetch_add(1, butil::memory_order_relaxed));
butil::PlatformThread::SetName(worker_thread_name.c_str());
- BT_VLOG << "Created worker=" << pthread_self() << " bthread=" <<
g->main_tid()
- << " tag=" << g->tag();
+ BT_VLOG << "Created worker=" << pthread_self() << " tid=" << g->_tid
+ << " bthread=" << g->main_tid() << " tag=" << g->tag();
tls_task_group = g;
c->_nworkers << 1;
c->tag_nworkers(g->tag()) << 1;
+
g->run_main_task();
stat = g->main_stat();
@@ -208,6 +214,13 @@ int TaskControl::init(int concurrency) {
LOG(ERROR) << "Fail to get global_timer_thread";
return -1;
}
+
+#ifdef BRPC_BTHREAD_TRACER
+ if (!_task_tracer.Init()) {
+ LOG(ERROR) << "Fail to init TaskTracer";
+ return -1;
+ }
+#endif // BRPC_BTHREAD_TRACER
_workers.resize(_concurrency);
for (int i = 0; i < _concurrency; ++i) {
@@ -281,6 +294,16 @@ TaskGroup* TaskControl::choose_one_group(bthread_tag_t
tag) {
return NULL;
}
+#ifdef BRPC_BTHREAD_TRACER
+void TaskControl::stack_trace(std::ostream& os, bthread_t tid) {
+ _task_tracer.Trace(os, tid);
+}
+
+std::string TaskControl::stack_trace(bthread_t tid) {
+ return _task_tracer.Trace(tid);
+}
+#endif // BRPC_BTHREAD_TRACER
+
extern int stop_and_join_epoll_threads();
void TaskControl::stop_and_join() {
@@ -301,13 +324,19 @@ void TaskControl::stop_and_join() {
pl.stop();
}
}
- // Interrupt blocking operations.
- for (size_t i = 0; i < _workers.size(); ++i) {
- interrupt_pthread(_workers[i]);
+
+ for (auto worker: _workers) {
+ // Interrupt blocking operations.
+#ifdef BRPC_BTHREAD_TRACER
+ // TaskTracer has registered signal handler for SIGURG.
+ pthread_kill(worker, SIGURG);
+#else
+ interrupt_pthread(worker);
+#endif // BRPC_BTHREAD_TRACER
}
// Join workers
- for (size_t i = 0; i < _workers.size(); ++i) {
- pthread_join(_workers[i], NULL);
+ for (auto worker : _workers) {
+ pthread_join(worker, NULL);
}
}
diff --git a/src/bthread/task_control.h b/src/bthread/task_control.h
index 12598079..95820a86 100644
--- a/src/bthread/task_control.h
+++ b/src/bthread/task_control.h
@@ -25,14 +25,15 @@
#ifndef NDEBUG
#include <iostream> // std::ostream
#endif
+#include <signal.h>
#include <stddef.h> // size_t
#include <vector>
#include <array>
#include <memory>
#include "butil/atomicops.h" // butil::atomic
#include "bvar/bvar.h" // bvar::PassiveStatus
+#include "bthread/task_tracer.h"
#include "bthread/task_meta.h" // TaskMeta
-#include "butil/resource_pool.h" // ResourcePool
#include "bthread/work_stealing_queue.h" // WorkStealingQueue
#include "bthread/parking_lot.h"
@@ -43,7 +44,11 @@ class TaskGroup;
// Control all task groups
class TaskControl {
- friend class TaskGroup;
+friend class TaskGroup;
+friend void wait_for_butex(void*);
+#ifdef BRPC_BTHREAD_TRACER
+friend bthread_t init_for_pthread_stack_trace();
+#endif // BRPC_BTHREAD_TRACER
public:
TaskControl();
@@ -86,6 +91,12 @@ public:
// If this method is called after init(), it never returns NULL.
TaskGroup* choose_one_group(bthread_tag_t tag);
+#ifdef BRPC_BTHREAD_TRACER
+ // A stacktrace of bthread can be helpful in debugging.
+ void stack_trace(std::ostream& os, bthread_t tid);
+ std::string stack_trace(bthread_t tid);
+#endif // BRPC_BTHREAD_TRACER
+
private:
typedef std::array<TaskGroup*, BTHREAD_MAX_CONCURRENCY> TaggedGroups;
static const int PARKING_LOT_NUM = 4;
@@ -144,6 +155,11 @@ private:
std::vector<bvar::Adder<int64_t>*> _tagged_nbthreads;
std::vector<TaggedParkingLot> _pl;
+
+#ifdef BRPC_BTHREAD_TRACER
+ TaskTracer _task_tracer;
+#endif // BRPC_BTHREAD_TRACER
+
};
inline bvar::LatencyRecorder& TaskControl::exposed_pending_time() {
diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp
index f922050e..170b2730 100644
--- a/src/bthread/task_group.cpp
+++ b/src/bthread/task_group.cpp
@@ -35,7 +35,6 @@
#include "bthread/task_control.h"
#include "bthread/task_group.h"
#include "bthread/timer_thread.h"
-#include "bthread/errno.h"
namespace bthread {
@@ -45,8 +44,7 @@ static const bthread_attr_t BTHREAD_ATTR_TASKGROUP = {
static bool pass_bool(const char*, bool) { return true; }
DEFINE_bool(show_bthread_creation_in_vars, false, "When this flags is on, The
time "
- "from bthread creation to first run will be recorded and shown "
- "in /vars");
+ "from bthread creation to first run will be recorded and shown in
/vars");
const bool ALLOW_UNUSED dummy_show_bthread_creation_in_vars =
::GFLAGS_NS::RegisterFlagValidator(&FLAGS_show_bthread_creation_in_vars,
pass_bool);
@@ -188,8 +186,7 @@ void TaskGroup::run_main_task() {
}
TaskGroup::TaskGroup(TaskControl* c)
- :
- _cur_meta(NULL)
+ : _cur_meta(NULL)
, _control(c)
, _num_nosignal(0)
, _nsignaled(0)
@@ -207,7 +204,7 @@ TaskGroup::TaskGroup(TaskControl* c)
, _sched_recursive_guard(0)
#endif
, _tag(BTHREAD_TAG_DEFAULT)
-{
+ , _tid(-1) {
_steal_seed = butil::fast_rand();
_steal_offset = OFFSET_TABLE[_steal_seed % ARRAY_SIZE(OFFSET_TABLE)];
CHECK(c);
@@ -268,6 +265,9 @@ void TaskGroup::task_runner(intptr_t skip_remained) {
// NOTE: tls_task_group is volatile since tasks are moved around
// different groups.
TaskGroup* g = tls_task_group;
+#ifdef BRPC_BTHREAD_TRACER
+ TaskTracer::set_running_status(g->tid(), g->_cur_meta);
+#endif // BRPC_BTHREAD_TRACER
if (!skip_remained) {
while (g->_last_context_remained) {
@@ -343,14 +343,28 @@ void TaskGroup::task_runner(intptr_t skip_remained) {
// is 0, change it to 1 to make bthread_t never be 0. Any access
// or join to the bthread after changing version will be rejected.
// The spinlock is for visibility of TaskGroup::get_attr.
+#ifdef BRPC_BTHREAD_TRACER
+ bool tracing = false;
+#endif // BRPC_BTHREAD_TRACER
{
BAIDU_SCOPED_LOCK(m->version_lock);
+#ifdef BRPC_BTHREAD_TRACER
+ tracing = TaskTracer::set_end_status_unsafe(m);
+#endif // BRPC_BTHREAD_TRACER
if (0 == ++*m->version_butex) {
++*m->version_butex;
}
}
butex_wake_except(m->version_butex, 0);
+#ifdef BRPC_BTHREAD_TRACER
+ if (tracing) {
+ // Wait for tracing completion.
+ g->_control->_task_tracer.WaitForTracing(m);
+ }
+ g->_control->_task_tracer.set_status(TASK_STATUS_UNKNOWN, m);
+#endif // BRPC_BTHREAD_TRACER
+
g->_control->_nbthreads << -1;
g->_control->tag_nbthreads(g->tag()) << -1;
g->set_remained(TaskGroup::_release_last_context, m);
@@ -385,7 +399,7 @@ int TaskGroup::start_foreground(TaskGroup** pg,
const bthread_attr_t using_attr = (attr ? *attr : BTHREAD_ATTR_NORMAL);
butil::ResourceId<TaskMeta> slot;
TaskMeta* m = butil::get_resource(&slot);
- if (__builtin_expect(!m, 0)) {
+ if (BAIDU_UNLIKELY(NULL == m)) {
return ENOMEM;
}
CHECK(m->current_waiter.load(butil::memory_order_relaxed) == NULL);
@@ -412,9 +426,12 @@ int TaskGroup::start_foreground(TaskGroup** pg,
TaskGroup* g = *pg;
g->_control->_nbthreads << 1;
g->_control->tag_nbthreads(g->tag()) << 1;
+#ifdef BRPC_BTHREAD_TRACER
+ g->_control->_task_tracer.set_status(TASK_STATUS_CREATED, m);
+#endif // BRPC_BTHREAD_TRACER
if (g->is_current_pthread_task()) {
// never create foreground task in pthread.
- g->ready_to_run(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL));
+ g->ready_to_run(m, using_attr.flags & BTHREAD_NOSIGNAL);
} else {
// NOSIGNAL affects current task, not the new task.
RemainedFn fn = NULL;
@@ -423,10 +440,7 @@ int TaskGroup::start_foreground(TaskGroup** pg,
} else {
fn = ready_to_run_in_worker;
}
- ReadyToRunArgs args = {
- g->current_tid(),
- (bool)(using_attr.flags & BTHREAD_NOSIGNAL)
- };
+ ReadyToRunArgs args = { g->_cur_meta, (bool)(using_attr.flags &
BTHREAD_NOSIGNAL) };
g->set_remained(fn, &args);
TaskGroup::sched_to(pg, m->tid);
}
@@ -445,7 +459,7 @@ int TaskGroup::start_background(bthread_t* __restrict th,
const bthread_attr_t using_attr = (attr ? *attr : BTHREAD_ATTR_NORMAL);
butil::ResourceId<TaskMeta> slot;
TaskMeta* m = butil::get_resource(&slot);
- if (__builtin_expect(!m, 0)) {
+ if (BAIDU_UNLIKELY(NULL == m)) {
return ENOMEM;
}
CHECK(m->current_waiter.load(butil::memory_order_relaxed) == NULL);
@@ -470,10 +484,13 @@ int TaskGroup::start_background(bthread_t* __restrict th,
}
_control->_nbthreads << 1;
_control->tag_nbthreads(tag()) << 1;
+#ifdef BRPC_BTHREAD_TRACER
+ _control->_task_tracer.set_status(TASK_STATUS_CREATED, m);
+#endif // BRPC_BTHREAD_TRACER
if (REMOTE) {
- ready_to_run_remote(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL));
+ ready_to_run_remote(m, (using_attr.flags & BTHREAD_NOSIGNAL));
} else {
- ready_to_run(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL));
+ ready_to_run(m, (using_attr.flags & BTHREAD_NOSIGNAL));
}
return 0;
}
@@ -495,7 +512,7 @@ int TaskGroup::join(bthread_t tid, void** return_value) {
return EINVAL;
}
TaskMeta* m = address_meta(tid);
- if (__builtin_expect(!m, 0)) {
+ if (BAIDU_UNLIKELY(NULL == m)) {
// The bthread is not created yet, this join is definitely wrong.
return EINVAL;
}
@@ -642,9 +659,16 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta*
next_meta) {
if (cur_meta->stack != NULL) {
if (next_meta->stack != cur_meta->stack) {
CheckBthreadScheSafety();
+#ifdef BRPC_BTHREAD_TRACER
+ g->_control->_task_tracer.set_status(TASK_STATUS_JUMPING,
cur_meta);
+ g->_control->_task_tracer.set_status(TASK_STATUS_JUMPING,
next_meta);
+#endif // BRPC_BTHREAD_TRACER
jump_stack(cur_meta->stack, next_meta->stack);
// probably went to another group, need to assign g again.
g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
+#ifdef BRPC_BTHREAD_TRACER
+ TaskTracer::set_running_status(g->tid(), g->_cur_meta);
+#endif // BRPC_BTHREAD_TRACER
}
#ifndef NDEBUG
else {
@@ -653,8 +677,13 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta*
next_meta) {
CHECK(cur_meta->stack == g->_main_stack);
}
#endif
+ } /* else because of ending_sched(including
pthread_task->pthread_task). */
+#ifdef BRPC_BTHREAD_TRACER
+ else {
+ // _cur_meta: TASK_STATUS_FIRST_READY -> TASK_STATUS_RUNNING.
+ TaskTracer::set_running_status(g->tid(), g->_cur_meta);
}
- // else because of ending_sched(including pthread_task->pthread_task)
+#endif // BRPC_BTHREAD_TRACER
} else {
LOG(FATAL) << "bthread=" << g->current_tid() << " sched_to itself!";
}
@@ -686,8 +715,12 @@ void TaskGroup::destroy_self() {
}
}
-void TaskGroup::ready_to_run(bthread_t tid, bool nosignal) {
- push_rq(tid);
+
+void TaskGroup::ready_to_run(TaskMeta* meta, bool nosignal) {
+#ifdef BRPC_BTHREAD_TRACER
+ _control->_task_tracer.set_status(TASK_STATUS_READY, meta);
+#endif // BRPC_BTHREAD_TRACER
+ push_rq(meta->tid);
if (nosignal) {
++_num_nosignal;
} else {
@@ -707,9 +740,12 @@ void TaskGroup::flush_nosignal_tasks() {
}
}
-void TaskGroup::ready_to_run_remote(bthread_t tid, bool nosignal) {
+void TaskGroup::ready_to_run_remote(TaskMeta* meta, bool nosignal) {
+#ifdef BRPC_BTHREAD_TRACER
+ _control->_task_tracer.set_status(TASK_STATUS_READY, meta);
+#endif // BRPC_BTHREAD_TRACER
_remote_rq._mutex.lock();
- while (!_remote_rq.push_locked(tid)) {
+ while (!_remote_rq.push_locked(meta->tid)) {
flush_nosignal_tasks_remote_locked(_remote_rq._mutex);
LOG_EVERY_SECOND(ERROR) << "_remote_rq is full, capacity="
<< _remote_rq.capacity();
@@ -740,11 +776,11 @@ void
TaskGroup::flush_nosignal_tasks_remote_locked(butil::Mutex& locked_mutex) {
_control->signal_task(val, _tag);
}
-void TaskGroup::ready_to_run_general(bthread_t tid, bool nosignal) {
+void TaskGroup::ready_to_run_general(TaskMeta* meta, bool nosignal) {
if (tls_task_group == this) {
- return ready_to_run(tid, nosignal);
+ return ready_to_run(meta, nosignal);
}
- return ready_to_run_remote(tid, nosignal);
+ return ready_to_run_remote(meta, nosignal);
}
void TaskGroup::flush_nosignal_tasks_general() {
@@ -756,12 +792,16 @@ void TaskGroup::flush_nosignal_tasks_general() {
void TaskGroup::ready_to_run_in_worker(void* args_in) {
ReadyToRunArgs* args = static_cast<ReadyToRunArgs*>(args_in);
- return tls_task_group->ready_to_run(args->tid, args->nosignal);
+ return tls_task_group->ready_to_run(args->meta, args->nosignal);
}
void TaskGroup::ready_to_run_in_worker_ignoresignal(void* args_in) {
ReadyToRunArgs* args = static_cast<ReadyToRunArgs*>(args_in);
- return tls_task_group->push_rq(args->tid);
+#ifdef BRPC_BTHREAD_TRACER
+ tls_task_group->_control->_task_tracer.set_status(
+ TASK_STATUS_READY, args->meta);
+#endif // BRPC_BTHREAD_TRACER
+ return tls_task_group->push_rq(args->meta->tid);
}
struct SleepArgs {
@@ -776,7 +816,7 @@ static void ready_to_run_from_timer_thread(void* arg) {
const SleepArgs* e = static_cast<const SleepArgs*>(arg);
auto g = e->group;
auto tag = g->tag();
- g->control()->choose_one_group(tag)->ready_to_run_remote(e->tid);
+ g->control()->choose_one_group(tag)->ready_to_run_remote(e->meta);
}
void TaskGroup::_add_sleep_event(void* void_args) {
@@ -785,6 +825,9 @@ void TaskGroup::_add_sleep_event(void* void_args) {
// will be gone.
SleepArgs e = *static_cast<SleepArgs*>(void_args);
TaskGroup* g = e.group;
+#ifdef BRPC_BTHREAD_TRACER
+ g->_control->_task_tracer.set_status(TASK_STATUS_SUSPENDED, e.meta);
+#endif // BRPC_BTHREAD_TRACER
TimerThread::TaskId sleep_id;
sleep_id = get_global_timer_thread()->schedule(
@@ -794,7 +837,7 @@ void TaskGroup::_add_sleep_event(void* void_args) {
if (!sleep_id) {
e.meta->sleep_failed = true;
// Fail to schedule timer, go back to previous thread.
- g->ready_to_run(e.tid);
+ g->ready_to_run(e.meta);
return;
}
@@ -818,7 +861,7 @@ void TaskGroup::_add_sleep_event(void* void_args) {
// schedule previous thread as well. If sleep_id does not exist,
// previous thread is scheduled by timer thread before and we don't
// have to do it again.
- g->ready_to_run(e.tid);
+ g->ready_to_run(e.meta);
}
}
@@ -921,12 +964,12 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c,
bthread_tag_t tag) {
if (get_global_timer_thread()->unschedule(sleep_id) == 0) {
bthread::TaskGroup* g = bthread::tls_task_group;
if (g) {
- g->ready_to_run(tid);
+ g->ready_to_run(TaskGroup::address_meta(tid));
} else {
if (!c) {
return EINVAL;
}
- c->choose_one_group(tag)->ready_to_run_remote(tid);
+
c->choose_one_group(tag)->ready_to_run_remote(TaskGroup::address_meta(tid));
}
}
}
@@ -935,7 +978,7 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c,
bthread_tag_t tag) {
void TaskGroup::yield(TaskGroup** pg) {
TaskGroup* g = *pg;
- ReadyToRunArgs args = { g->current_tid(), false };
+ ReadyToRunArgs args = { g->_cur_meta, false };
g->set_remained(ready_to_run_in_worker, &args);
sched(pg);
}
@@ -957,6 +1000,9 @@ void print_task(std::ostream& os, bthread_t tid) {
bool has_tls = false;
int64_t cpuwide_start_ns = 0;
TaskStatistics stat = {0, 0};
+ TaskStatus status = TASK_STATUS_UNKNOWN;
+ bool traced = false;
+ pid_t worker_tid = 0;
{
BAIDU_SCOPED_LOCK(m->version_lock);
if (given_ver == *m->version_butex) {
@@ -970,6 +1016,9 @@ void print_task(std::ostream& os, bthread_t tid) {
has_tls = m->local_storage.keytable;
cpuwide_start_ns = m->cpuwide_start_ns;
stat = m->stat;
+ status = m->status;
+ traced = m->traced;
+ worker_tid = m->worker_tid;
}
}
if (!matched) {
@@ -986,7 +1035,15 @@ void print_task(std::ostream& os, bthread_t tid) {
<< "}\nhas_tls=" << has_tls
<< "\nuptime_ns=" << butil::cpuwide_time_ns() - cpuwide_start_ns
<< "\ncputime_ns=" << stat.cputime_ns
- << "\nnswitch=" << stat.nswitch;
+ << "\nnswitch=" << stat.nswitch
+#ifdef BRPC_BTHREAD_TRACER
+ << "\nstatus=" << status
+ << "\ntraced=" << traced
+ << "\nworker_tid=" << worker_tid;
+#else
+ ;
+ (void)status;(void)traced;(void)worker_tid;
+#endif // BRPC_BTHREAD_TRACER
}
}
diff --git a/src/bthread/task_group.h b/src/bthread/task_group.h
index 7c197c01..c3d2ae46 100644
--- a/src/bthread/task_group.h
+++ b/src/bthread/task_group.h
@@ -83,7 +83,7 @@ public:
// then being popped by sched(pg), which is not necessary.
static void sched_to(TaskGroup** pg, TaskMeta* next_meta);
static void sched_to(TaskGroup** pg, bthread_t next_tid);
- static void exchange(TaskGroup** pg, bthread_t next_tid);
+ static void exchange(TaskGroup** pg, TaskMeta* next_meta);
// The callback will be run in the beginning of next-run bthread.
// Can't be called by current bthread directly because it often needs
@@ -151,18 +151,18 @@ public:
int64_t cumulated_cputime_ns() const { return _cumulated_cputime_ns; }
// Push a bthread into the runqueue
- void ready_to_run(bthread_t tid, bool nosignal = false);
+ void ready_to_run(TaskMeta* meta, bool nosignal = false);
// Flush tasks pushed to rq but signalled.
void flush_nosignal_tasks();
// Push a bthread into the runqueue from another non-worker thread.
- void ready_to_run_remote(bthread_t tid, bool nosignal = false);
+ void ready_to_run_remote(TaskMeta* meta, bool nosignal = false);
void flush_nosignal_tasks_remote_locked(butil::Mutex& locked_mutex);
void flush_nosignal_tasks_remote();
// Automatically decide the caller is remote or local, and call
// the corresponding function.
- void ready_to_run_general(bthread_t tid, bool nosignal = false);
+ void ready_to_run_general(TaskMeta* meta, bool nosignal = false);
void flush_nosignal_tasks_general();
// The TaskControl that this TaskGroup belongs to.
@@ -189,6 +189,8 @@ public:
bthread_tag_t tag() const { return _tag; }
+ pid_t tid() const { return _tid; }
+
int64_t current_task_cpu_clock_ns() {
if (_last_cpu_clock_ns == 0) {
return 0;
@@ -206,7 +208,7 @@ friend class TaskControl;
int init(size_t runqueue_capacity);
- // You shall call destroy_self() instead of destructor because deletion
+ // You shall call destroy_selfm() instead of destructor because deletion
// of groups are postponed to avoid race.
~TaskGroup();
@@ -216,7 +218,7 @@ friend class TaskControl;
static void _release_last_context(void*);
static void _add_sleep_event(void*);
struct ReadyToRunArgs {
- bthread_t tid;
+ TaskMeta* meta;
bool nosignal;
};
static void ready_to_run_in_worker(void*);
@@ -273,6 +275,9 @@ friend class TaskControl;
int _sched_recursive_guard;
// tag of this taskgroup
bthread_tag_t _tag;
+
+ // Worker thread id.
+ pid_t _tid;
};
} // namespace bthread
diff --git a/src/bthread/task_group_inl.h b/src/bthread/task_group_inl.h
index 45626ceb..75c377e1 100644
--- a/src/bthread/task_group_inl.h
+++ b/src/bthread/task_group_inl.h
@@ -46,17 +46,17 @@ inline TaskMeta* TaskGroup::address_meta(bthread_t tid) {
return address_resource(get_slot(tid));
}
-inline void TaskGroup::exchange(TaskGroup** pg, bthread_t next_tid) {
+inline void TaskGroup::exchange(TaskGroup** pg, TaskMeta* next_meta) {
TaskGroup* g = *pg;
if (g->is_current_pthread_task()) {
- return g->ready_to_run(next_tid);
+ return g->ready_to_run(next_meta);
}
- ReadyToRunArgs args = { g->current_tid(), false };
+ ReadyToRunArgs args = { g->_cur_meta, false };
g->set_remained((g->current_task()->about_to_quit
? ready_to_run_in_worker_ignoresignal
: ready_to_run_in_worker),
&args);
- TaskGroup::sched_to(pg, next_tid);
+ TaskGroup::sched_to(pg, next_meta);
}
inline void TaskGroup::sched_to(TaskGroup** pg, bthread_t next_tid) {
diff --git a/src/bthread/task_meta.h b/src/bthread/task_meta.h
index 3d901633..34d86632 100644
--- a/src/bthread/task_meta.h
+++ b/src/bthread/task_meta.h
@@ -27,6 +27,7 @@
#include "butil/atomicops.h" // butil::atomic
#include "bthread/types.h" // bthread_attr_t
#include "bthread/stack.h" // ContextualStack
+#include "bthread/timer_thread.h"
namespace bthread {
@@ -49,59 +50,75 @@ struct LocalStorage {
const static LocalStorage LOCAL_STORAGE_INIT =
BTHREAD_LOCAL_STORAGE_INITIALIZER;
+enum TaskStatus {
+ TASK_STATUS_UNKNOWN,
+ TASK_STATUS_CREATED,
+ TASK_STATUS_FIRST_READY,
+ TASK_STATUS_READY,
+ TASK_STATUS_JUMPING,
+ TASK_STATUS_RUNNING,
+ TASK_STATUS_SUSPENDED,
+ TASK_STATUS_END,
+};
+
struct TaskMeta {
// [Not Reset]
- butil::atomic<ButexWaiter*> current_waiter;
- uint64_t current_sleep;
+ butil::atomic<ButexWaiter*> current_waiter{NULL};
+ uint64_t current_sleep{TimerThread::INVALID_TASK_ID};
// A flag to mark if the Timer scheduling failed.
- bool sleep_failed;
+ bool sleep_failed{false};
// A builtin flag to mark if the thread is stopping.
- bool stop;
+ bool stop{false};
// The thread is interrupted and should wake up from some blocking ops.
- bool interrupted;
+ bool interrupted{false};
// Scheduling of the thread can be delayed.
- bool about_to_quit;
+ bool about_to_quit{false};
// [Not Reset] guarantee visibility of version_butex.
- pthread_spinlock_t version_lock;
+ pthread_spinlock_t version_lock{};
// [Not Reset] only modified by one bthread at any time, no need to be
atomic
- uint32_t* version_butex;
+ uint32_t* version_butex{NULL};
// The identifier. It does not have to be here, however many code is
// simplified if they can get tid from TaskMeta.
- bthread_t tid;
+ bthread_t tid{INVALID_BTHREAD};
// User function and argument
- void* (*fn)(void*);
- void* arg;
+ void* (*fn)(void*){NULL};
+ void* arg{NULL};
// Stack of this task.
- ContextualStack* stack;
+ ContextualStack* stack{NULL};
// Attributes creating this task
- bthread_attr_t attr;
+ bthread_attr_t attr{BTHREAD_ATTR_NORMAL};
// Statistics
- int64_t cpuwide_start_ns;
- TaskStatistics stat;
+ int64_t cpuwide_start_ns{0};
+ TaskStatistics stat{};
// bthread local storage, sync with tls_bls (defined in task_group.cpp)
// when the bthread is created or destroyed.
// DO NOT use this field directly, use tls_bls instead.
- LocalStorage local_storage;
+ LocalStorage local_storage{};
+
+ // Only used when TaskTracer is enabled.
+ // Bthread status.
+ TaskStatus status{TASK_STATUS_UNKNOWN};
+ // Whether bthread is traced?
+ bool traced{false};
+ // Worker thread id.
+ pid_t worker_tid{-1};
public:
// Only initialize [Not Reset] fields, other fields will be reset in
// bthread_start* functions
- TaskMeta()
- : current_waiter(NULL)
- , current_sleep(0)
- , stack(NULL) {
+ TaskMeta() {
pthread_spin_init(&version_lock, 0);
version_butex = butex_create_checked<uint32_t>();
*version_butex = 1;
diff --git a/src/bthread/task_tracer.cpp b/src/bthread/task_tracer.cpp
new file mode 100644
index 00000000..acdf9208
--- /dev/null
+++ b/src/bthread/task_tracer.cpp
@@ -0,0 +1,569 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifdef BRPC_BTHREAD_TRACER
+
+#include "bthread/task_tracer.h"
+#include <unistd.h>
+#include <poll.h>
+#include <gflags/gflags.h>
+#include "butil/debug/stack_trace.h"
+#include "butil/memory/scope_guard.h"
+#include "butil/reloadable_flags.h"
+#include "bthread/task_group.h"
+#include "bthread/processor.h"
+
+namespace bthread {
+
+DEFINE_bool(enable_fast_unwind, true, "Whether to enable fast unwind");
+DEFINE_uint32(signal_trace_timeout_ms, 50, "Timeout for signal trace in ms");
+BUTIL_VALIDATE_GFLAG(signal_trace_timeout_ms,
butil::PositiveInteger<uint32_t>);
+
+extern BAIDU_THREAD_LOCAL TaskMeta* pthread_fake_meta;
+
+TaskTracer::SignalSync::~SignalSync() {
+ if (_pipe_init) {
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+ }
+
+ if (_sem_init) {
+ sem_destroy(&sem);
+ }
+}
+
+bool TaskTracer::SignalSync::Init() {
+ if (pipe(pipe_fds) != 0) {
+ PLOG(ERROR) << "Fail to pipe";
+ return false;
+ }
+ if (butil::make_non_blocking(pipe_fds[0]) != 0) {
+ PLOG(ERROR) << "Fail to make_non_blocking";
+ return false;
+ }
+ if (butil::make_non_blocking(pipe_fds[1]) != 0) {
+ PLOG(ERROR) << "Fail to make_non_blocking";
+ return false;
+ }
+ _pipe_init = true;
+
+ if (sem_init(&sem, 0, 0) != 0) {
+ PLOG(ERROR) << "Fail to sem_init";
+ return false;
+ }
+ _sem_init = true;
+
+ return true;
+}
+
+std::string TaskTracer::Result::OutputToString() {
+ std::string str;
+ if (err_count > 0 || frame_count > 0) {
+ str.reserve(1024);
+ }
+ if (frame_count > 0) {
+ if (fast_unwind) {
+ butil::debug::StackTrace stack_trace((void**)&ips, frame_count);
+ stack_trace.OutputToString(str);
+ } else {
+ for (size_t i = 0; i < frame_count; ++i) {
+ butil::string_appendf(&str, "#%zu 0x%016lx ", i, ips[i]);
+ if (mangled[i][0] == '\0') {
+ str.append("<unknown>");
+ } else {
+ str.append(butil::demangle(mangled[i]));
+ }
+ if (i + 1 < frame_count) {
+ str.push_back('\n');
+ }
+ }
+ }
+ } else {
+ str.append("No frame");
+ }
+
+ if (err_count > 0) {
+ str.append("\nError message:\n");
+ }
+ for (size_t i = 0; i < err_count; ++i) {
+ str.append(err_msg[i]);
+ if (i + 1 < err_count) {
+ str.push_back('\n');
+ }
+ }
+
+ return str;
+}
+
+void TaskTracer::Result::OutputToStream(std::ostream& os) {
+ if (frame_count > 0) {
+ if (fast_unwind) {
+ butil::debug::StackTrace stack_trace((void**)&ips, frame_count);
+ stack_trace.OutputToStream(&os);
+ } else {
+ for (size_t i = 0; i < frame_count; ++i) {
+ os << "# " << i << " 0x" << std::hex << ips[i] << std::dec <<
" ";
+ if (mangled[i][0] == '\0') {
+ os << "<unknown>";
+ } else {
+ os << butil::demangle(mangled[i]);
+ }
+ if (i + 1 < frame_count) {
+ os << '\n';
+ }
+ }
+ }
+ } else {
+ os << "No frame";
+ }
+
+ if (err_count == 0) {
+ return;
+ }
+
+ os << "\nError message:\n";
+ for (size_t i = 0; i < err_count; ++i) {
+ os << err_msg[i];
+ if (i + 1 < err_count) {
+ os << '\n';
+ }
+ }
+}
+
+bool TaskTracer::Init() {
+ if (_trace_time.expose("bthread_trace_time") != 0) {
+ return false;
+ }
+ if (!RegisterSignalHandler()) {
+ return false;
+ }
+ // Warm up the libunwind.
+ unw_cursor_t cursor;
+ if (unw_getcontext(&_context) == 0 && unw_init_local(&cursor, &_context)
== 0) {
+ butil::ignore_result(TraceCore(cursor));
+ }
+ return true;
+}
+
+void TaskTracer::set_status(TaskStatus s, TaskMeta* m) {
+ CHECK_NE(TASK_STATUS_RUNNING, s) << "Use `set_running_status' instead";
+ CHECK_NE(TASK_STATUS_END, s) << "Use `set_end_status_unsafe' instead";
+
+ bool tracing;
+ {
+ BAIDU_SCOPED_LOCK(m->version_lock);
+ if (TASK_STATUS_UNKNOWN == m->status && TASK_STATUS_JUMPING == s) {
+ // Do not update status for jumping when bthread is ending.
+ return;
+ }
+
+ tracing = m->traced;
+ // bthread is scheduled for the first time.
+ if (TASK_STATUS_READY == s || NULL == m->stack) {
+ m->status = TASK_STATUS_FIRST_READY;
+ } else {
+ m->status = s;
+ }
+ if (TASK_STATUS_CREATED == s) {
+ m->worker_tid = -1;
+ }
+ }
+
+ // Make sure bthread does not jump stack when it is being traced.
+ if (tracing && TASK_STATUS_JUMPING == s) {
+ WaitForTracing(m);
+ }
+}
+
+void TaskTracer::set_running_status(pid_t worker_tid, TaskMeta* m) {
+ BAIDU_SCOPED_LOCK(m->version_lock);
+ m->worker_tid = worker_tid;
+ m->status = TASK_STATUS_RUNNING;
+}
+
+bool TaskTracer::set_end_status_unsafe(TaskMeta* m) {
+ m->status = TASK_STATUS_END;
+ return m->traced;
+}
+
+std::string TaskTracer::Trace(bthread_t tid) {
+ return TraceImpl(tid).OutputToString();
+}
+
+void TaskTracer::Trace(std::ostream& os, bthread_t tid) {
+ TraceImpl(tid).OutputToStream(os);
+}
+
+void TaskTracer::WaitForTracing(TaskMeta* m) {
+ BAIDU_SCOPED_LOCK(_mutex);
+ while (m->traced) {
+ _cond.Wait();
+ }
+}
+
+TaskStatus TaskTracer::WaitForJumping(TaskMeta* m) {
+ // Reasons for not using locks here:
+ // 1. It is necessary to lock before jump_stack, unlock after jump_stack,
+ // which involves two different bthread and is prone to errors.
+ // 2. jump_stack is fast.
+ int i = 0;
+ do {
+ // The bthread is jumping now, spin until it finishes.
+ if (i++ < 30) {
+ cpu_relax();
+ } else {
+ sched_yield();
+ }
+
+ BAIDU_SCOPED_LOCK(m->version_lock);
+ if (TASK_STATUS_JUMPING != m->status) {
+ return m->status;
+ }
+ } while (true);
+}
+
+TaskTracer::Result TaskTracer::TraceImpl(bthread_t tid) {
+ butil::Timer timer(butil::Timer::STARTED);
+ BRPC_SCOPE_EXIT {
+ timer.stop();
+ _trace_time << timer.n_elapsed();
+ };
+
+ if (tid == bthread_self() ||
+ (NULL != pthread_fake_meta && tid == pthread_fake_meta->tid)) {
+ return Result::MakeErrorResult("Can not trace self=%d", tid);
+ }
+
+ // Make sure only one bthread is traced at a time.
+ BAIDU_SCOPED_LOCK(_trace_request_mutex);
+
+ TaskMeta* m = TaskGroup::address_meta(tid);
+ if (NULL == m) {
+ return Result::MakeErrorResult("bthread=%d never existed", tid);
+ }
+
+ BAIDU_SCOPED_LOCK(_mutex);
+ TaskStatus status;
+ pid_t worker_tid;
+ const uint32_t given_version = get_version(tid);
+ {
+ BAIDU_SCOPED_LOCK(m->version_lock);
+ if (given_version == *m->version_butex) {
+ // Start tracing.
+ m->traced = true;
+ worker_tid = m->worker_tid;
+ status = m->status;
+ } else {
+ return Result::MakeErrorResult("bthread=%d not exist now", tid);
+ }
+ }
+
+ if (TASK_STATUS_UNKNOWN == status) {
+ return Result::MakeErrorResult("bthread=%d not exist now", tid);
+ } else if (TASK_STATUS_CREATED == status) {
+ return Result::MakeErrorResult("bthread=%d has just been created",
tid);
+ } else if (TASK_STATUS_FIRST_READY == status) {
+ return Result::MakeErrorResult("bthread=%d is scheduled for the first
time", tid);
+ } else if (TASK_STATUS_END == status) {
+ return Result::MakeErrorResult("bthread=%d has ended", tid);
+ } else if (TASK_STATUS_JUMPING == status) {
+ // Wait for jumping completion.
+ status = WaitForJumping(m);
+ }
+
+ // After jumping, the status may be RUNNING, SUSPENDED, or READY, which is
traceable.
+ Result result{};
+ if (TASK_STATUS_RUNNING == status) {
+ result = SignalTrace(worker_tid);
+ } else if (TASK_STATUS_SUSPENDED == status || TASK_STATUS_READY == status)
{
+ result = ContextTrace(m->stack->context);
+ }
+
+ {
+ BAIDU_SCOPED_LOCK(m->version_lock);
+ // If m->status is BTHREAD_STATUS_END, the bthread also waits for
tracing completion,
+ // so given_version != *m->version_butex is OK.
+ m->traced = false;
+ }
+ // Wake up the waiting worker thread to jump.
+ _cond.Signal();
+
+ return result;
+}
+
+unw_cursor_t TaskTracer::MakeCursor(bthread_fcontext_t fcontext) {
+ unw_cursor_t cursor;
+ unw_init_local(&cursor, &_context);
+ auto regs = reinterpret_cast<uintptr_t*>(fcontext);
+
+ // Only need RBP, RIP, RSP on x86_64.
+ // The base pointer (RBP).
+ if (unw_set_reg(&cursor, UNW_X86_64_RBP, regs[6]) != 0) {
+ LOG(ERROR) << "Fail to set RBP";
+ }
+ // The instruction pointer (RIP).
+ if (unw_set_reg(&cursor, UNW_REG_IP, regs[7]) != 0) {
+ LOG(ERROR) << "Fail to set RIP";
+ }
+#if UNW_VERSION_MAJOR >= 1 && UNW_VERSION_MINOR >= 7
+ // The stack pointer (RSP).
+ if (unw_set_reg(&cursor, UNW_REG_SP, regs[8]) != 0) {
+ LOG(ERROR) << "Fail to set RSP";
+ }
+#endif
+
+ return cursor;
+}
+
+TaskTracer::Result TaskTracer::ContextTrace(bthread_fcontext_t fcontext) {
+ unw_cursor_t cursor = MakeCursor(fcontext);
+ return TraceCore(cursor);
+}
+
+bool TaskTracer::RegisterSignalHandler() {
+ // Set up the signal handler.
+ struct sigaction old_sa{};
+ struct sigaction sa{};
+ sa.sa_sigaction = SignalHandler;
+ sa.sa_flags = SA_SIGINFO;
+ sigfillset(&sa.sa_mask);
+ if (sigaction(SIGURG, &sa, &old_sa) != 0) {
+ PLOG(ERROR) << "Failed to sigaction";
+ return false;
+ }
+ if (NULL != old_sa.sa_handler || NULL != old_sa.sa_sigaction) {
+ LOG(ERROR) << "Signal handler of SIGURG is already registered";
+ return false;
+ }
+
+ return true;
+}
+
+// Caution: This function should be async-signal-safety.
+void TaskTracer::SignalHandler(int, siginfo_t* info, void* context) {
+ ErrnoGuard guard;
+ butil::intrusive_ptr<SignalSync> signal_sync(
+ static_cast<SignalSync*>(info->si_value.sival_ptr));
+ if (NULL == signal_sync) {
+ // The signal is not from Tracer, such as TaskControl, do nothing.
+ return;
+ }
+
+ signal_sync->context = static_cast<unw_context_t*>(context);
+ // Notify SignalTrace that SignalTraceHandler has started.
+ // Binary semaphore do not fail, so no need to check return value.
+ // sem_post() is async-signal-safe.
+ sem_post(&signal_sync->sem);
+
+ butil::Timer timer;
+ if (FLAGS_signal_trace_timeout_ms > 0) {
+ timer.start();
+ }
+ int timeout = -1;
+ pollfd poll_fd = {signal_sync->pipe_fds[0], POLLIN, 0};
+ // Wait for tracing to complete.
+ while (true) {
+ if (FLAGS_signal_trace_timeout_ms > 0) {
+ timer.stop();
+ // At least 1ms timeout.
+ timeout = std::max(
+ (int64_t)FLAGS_signal_trace_timeout_ms - timer.m_elapsed(),
(int64_t)1);
+ }
+ // poll() is async-signal-safe.
+ // Similar to self-pipe trick:
https://man7.org/tlpi/code/online/dist/altio/self_pipe.c.html
+ int rc = poll(&poll_fd, 1, timeout);
+ if (-1 == rc && EINTR == errno) {
+ continue;
+ }
+ // No need to read the pipe or handle errors, just return.
+ return;
+ }
+}
+
+// Caution: This fnction should be async-signal-safety.
+bool TaskTracer::WaitForSignalHandler(butil::intrusive_ptr<SignalSync>
signal_sync,
+ const timespec* abs_timeout, Result&
result) {
+ // It is safe to sem_timedwait() here and sem_post() in SignalHandler.
+ while (sem_timedwait(&signal_sync->sem, abs_timeout) != 0) {
+ if (EINTR == errno) {
+ continue;
+ }
+ if (ETIMEDOUT == errno) {
+ result.SetError("Timeout exceed %dms",
FLAGS_signal_trace_timeout_ms);
+ } else {
+ // During the process of signal handler,
+ // can not use berro() which is not async-signal-safe.
+ result.SetError("Fail to sem_timedwait, errno=%d", errno);
+ }
+ return false;
+ }
+ return true;
+}
+
+// Caution: This fnction should be async-signal-safety.
+void TaskTracer::WakeupSignalHandler(butil::intrusive_ptr<SignalSync>
signal_sync, Result& result) {
+ while (true) {
+ ssize_t nw = write(signal_sync->pipe_fds[1], "1", 1);
+ if (0 < nw) {
+ break;
+ } else if (-1 == nw && EINTR == errno) {
+ // Only EINTR is allowed. Even EAGAIN should not be returned.
+ continue;
+ }
+ // During the process of signal handler,
+ // can not use berro() which is not async-signal-safe.
+ result.SetError("Fail to write pipe to notify signal handler,
errno=%d", errno);
+ }
+}
+
+TaskTracer::Result TaskTracer::SignalTrace(pid_t tid) {
+ // CAUTION:
+ // The signal handler will wait for the backtrace to complete.
+ // If the worker thread is interrupted when holding a resource(lock, etc),
+ // and this function waits for the resource during capturing backtraces,
+ // it may cause a deadlock.
+ //
+ //
https://github.com/gperftools/gperftools/wiki/gperftools'-stacktrace-capturing-methods-and-their-issues#libunwind
+ // Generally, libunwind promises async-signal-safety for capturing
backtraces.
+ // But in practice, it is only partially async-signal-safe due to reliance
on
+ // dl_iterate_phdr API, which is used to enumerate all loaded ELF modules
+ // (.so files and main executable binary). No libc offers dl_iterate_pdhr
that
+ // is async-signal-safe. In practice, the issue may happen if we take
tracing
+ // signal during an existing dl_iterate_phdr call (such as when the program
+ // throws an exception) or during dlopen/dlclose-ing some .so module.
+ // Deadlock call stack:
+ // #0 __lll_lock_wait (futex=futex@entry=0x7f0d3d7f0990
<_rtld_global+2352>, private=0) at lowlevellock.c:52
+ // #1 0x00007f0d3a73c131 in __GI___pthread_mutex_lock
(mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115
+ // #2 0x00007f0d38eb0231 in __GI___dl_iterate_phdr
(callback=callback@entry=0x7f0d38c456a0 <_ULx86_64_dwarf_callback>,
data=data@entry=0x7f0d07defad0) at dl-iteratephdr.c:40
+ // #3 0x00007f0d38c45d79 in _ULx86_64_dwarf_find_proc_info
(as=0x7f0d38c4f340 <local_addr_space>, ip=ip@entry=139694791966897,
pi=pi@entry=0x7f0d07df0498, need_unwind_info=need_unwind_info@entry=1,
arg=0x7f0d07df0340) at dwarf/Gfind_proc_info-lsb.c:759
+ // #4 0x00007f0d38c43260 in fetch_proc_info (c=c@entry=0x7f0d07df0340,
ip=139694791966897) at dwarf/Gparser.c:461
+ // #5 0x00007f0d38c44e46 in find_reg_state (sr=0x7f0d07defd10,
c=0x7f0d07df0340) at dwarf/Gparser.c:925
+ // #6 _ULx86_64_dwarf_step (c=c@entry=0x7f0d07df0340) at
dwarf/Gparser.c:972
+ // #7 0x00007f0d38c40c14 in _ULx86_64_step
(cursor=cursor@entry=0x7f0d07df0340) at x86_64/Gstep.c:71
+ // #8 0x00007f0d399ed8f6 in GetStackTraceWithContext_libunwind
(result=<optimized out>, max_depth=63, skip_count=132054887, ucp=<optimized
out>) at src/stacktrace_libunwind-inl.h:138
+ // #9 0x00007f0d399ee083 in GetStackTraceWithContext
(result=0x7f0d07df07b8, max_depth=63, skip_count=3, uc=0x7f0d07df0a40) at
src/stacktrace.cc:305
+ // #10 0x00007f0d399ea992 in CpuProfiler::prof_handler
(signal_ucontext=<optimized out>, cpu_profiler=0x7f0d399f6600, sig=<optimized
out>) at src/profiler.cc:359
+ // #11 0x00007f0d399eb633 in ProfileHandler::SignalHandler (sig=27,
sinfo=0x7f0d07df0b70, ucontext=0x7f0d07df0a40) at src/profile-handler.cc:530
+ // #12 <signal handler called>
+ // #13 0x00007f0d3a73c0b1 in __GI___pthread_mutex_lock
(mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115
+ // #14 0x00007f0d38eb0231 in __GI___dl_iterate_phdr
(callback=0x7f0d38f525f0, data=0x7f0d07df10c0) at dl-iteratephdr.c:40
+ // #15 0x00007f0d38f536c1 in _Unwind_Find_FDE () from
/lib/x86_64-linux-gnu/libgcc_s.so.1
+ // #16 0x00007f0d38f4f868 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
+ // #17 0x00007f0d38f50a20 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
+ // #18 0x00007f0d38f50f99 in _Unwind_RaiseException () from
/lib/x86_64-linux-gnu/libgcc_s.so.1
+ // #19 0x00007f0d390088dc in __cxa_throw () from
/lib/x86_64-linux-gnu/libstdc++.so.6
+ // #20 0x00007f0d3b5b2245 in __cxxabiv1::__cxa_throw
(thrownException=0x7f0d114ea8c0, type=0x7f0d3d6dd830 <typeinfo for
rockset::GRPCError>, destructor=<optimized out>) at
/src/folly/folly/experimental/exception_tracer/ExceptionTracerLib.cpp:107
+ //
+ // Therefore, we do not capture backtracks in the signal handler to avoid
mutex
+ // reentry and deadlock. Instead, we capture backtracks in this function
and
+ // ends the signal handler after capturing backtraces is complete.
+ // Even so, there is still a deadlock problem:
+ // the worker thread is interrupted when during an existing
dl_iterate_phdr call,
+ // and wait for the capturing backtraces to complete. This function capture
+ // backtracks with dl_iterate_phdr. We introduce a timeout mechanism in
signal
+ // handler to avoid deadlock.
+
+ // Each signal trace has an independent SignalSync to
+ // prevent the previous SignalHandler from affecting the new SignalTrace.
+ butil::intrusive_ptr<SignalSync> signal_sync(new SignalSync());
+ if (!signal_sync->Init()) {
+ return Result::MakeErrorResult("Fail to init SignalSync");
+ }
+ // Add reference for SignalHandler.
+ signal_sync->AddRefManually();
+
+ union sigval value{};
+ value.sival_ptr = signal_sync.get();
+ size_t sigqueue_try = 0;
+ while (sigqueue(tid, SIGURG, value) != 0) {
+ if (errno != EAGAIN || sigqueue_try++ >= 3) {
+ return Result::MakeErrorResult("Fail to sigqueue: %s", berror());
+ }
+ }
+
+ // Caution: Start here, need to ensure async-signal-safety.
+ Result result;
+ // Wakeup the signal handler at the end.
+ BRPC_SCOPE_EXIT {
+ WakeupSignalHandler(signal_sync, result);
+ };
+
+ timespec abs_timeout{};
+ timespec* abs_timeout_ptr = NULL;
+ if (FLAGS_signal_trace_timeout_ms > 0) {
+ abs_timeout =
butil::milliseconds_from_now(FLAGS_signal_trace_timeout_ms);
+ abs_timeout_ptr = &abs_timeout;
+ }
+ // Wait for the signal handler to start.
+ if (!WaitForSignalHandler(signal_sync, abs_timeout_ptr, result)) {
+ return result;
+ }
+
+ if (NULL == signal_sync->context) {
+ result.SetError("context is NULL");
+ return result;
+ }
+ unw_cursor_t cursor;
+ int rc = unw_init_local(&cursor, signal_sync->context);
+ if (0 != rc) {
+ result.SetError("Failed to init local, rc=%d", rc);
+ return result;
+ }
+
+ return TraceCore(cursor);
+}
+
+TaskTracer::Result TaskTracer::TraceCore(unw_cursor_t& cursor) {
+ Result result{};
+ result.fast_unwind = FLAGS_enable_fast_unwind;
+ for (result.frame_count = 0; result.frame_count < arraysize(result.ips);
++result.frame_count) {
+ int rc = unw_step(&cursor);
+ if (0 == rc) {
+ break;
+ } else if (rc < 0) {
+ return Result::MakeErrorResult("Fail to unw_step, rc=%d", rc);
+ }
+
+ unw_word_t ip = 0;
+ // Fast unwind do not care about the return value.
+ rc = unw_get_reg(&cursor, UNW_REG_IP, &ip);
+ result.ips[result.frame_count] = ip;
+
+ if (result.fast_unwind) {
+ continue;
+ }
+
+ if (0 != rc) {
+ result.mangled[result.frame_count][0] = '\0';
+ continue;
+ }
+
+ // Slow path.
+ rc = unw_get_proc_name(&cursor, result.mangled[result.frame_count],
+ sizeof(result.mangled[result.frame_count]),
NULL);
+ // UNW_ENOMEM is OK.
+ if (0 != rc && UNW_ENOMEM != rc) {
+ result.mangled[result.frame_count][0] = '\0';
+ }
+ }
+
+ return result;
+}
+
+} // namespace bthread
+
+#endif // BRPC_BTHREAD_TRACER
diff --git a/src/bthread/task_tracer.h b/src/bthread/task_tracer.h
new file mode 100644
index 00000000..8c84f7b9
--- /dev/null
+++ b/src/bthread/task_tracer.h
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef BTHREAD_TASK_TRACER_H
+#define BTHREAD_TASK_TRACER_H
+
+#ifdef BRPC_BTHREAD_TRACER
+
+#include <signal.h>
+#include <semaphore.h>
+#include <vector>
+#include <algorithm>
+#include <libunwind.h>
+#include "butil/strings/safe_sprintf.h"
+#include "butil/synchronization/condition_variable.h"
+#include "butil/shared_object.h"
+#include "butil/fd_utility.h"
+#include "bthread/task_meta.h"
+#include "bthread/mutex.h"
+
+namespace bthread {
+
+// Tracer for bthread.
+class TaskTracer {
+public:
+ // Returns 0 on success, -1 otherwise.
+ bool Init();
+ // Set the status to `s'.
+ void set_status(TaskStatus s, TaskMeta* meta);
+ static void set_running_status(pid_t worker_tid, TaskMeta* meta);
+ static bool set_end_status_unsafe(TaskMeta* m);
+
+ // Trace the bthread of `tid'.
+ std::string Trace(bthread_t tid);
+ void Trace(std::ostream& os, bthread_t tid);
+
+ // When the worker is jumping stack from a bthread to another,
+ void WaitForTracing(TaskMeta* m);
+
+private:
+ // Error number guard used in signal handler.
+ class ErrnoGuard {
+ public:
+ ErrnoGuard() : _errno(errno) {}
+ ~ErrnoGuard() { errno = _errno; }
+ private:
+ int _errno;
+ };
+
+ struct Result {
+ template<typename... Args>
+ static Result MakeErrorResult(const char* fmt, Args... args) {
+ Result result{};
+ result.SetError(fmt, std::forward<Args>(args)...);
+ return result;
+ }
+
+ template<typename... Args>
+ void SetError(const char* fmt, Args... args) {
+ err_count = std::max(err_count + 1, MAX_ERROR_NUM);
+ butil::strings::SafeSPrintf(err_msg[err_count - 1], fmt, args...);
+ }
+
+ std::string OutputToString();
+ void OutputToStream(std::ostream& os);
+
+ bool OK() const { return err_count == 0; }
+
+ static const size_t MAX_TRACE_NUM = 64;
+ static const size_t MAX_ERROR_NUM = 2;
+
+ unw_word_t ips[MAX_TRACE_NUM];
+ char mangled[MAX_TRACE_NUM][256]{};
+ size_t frame_count{0};
+ char err_msg[MAX_ERROR_NUM][64]{};
+ size_t err_count{0};
+
+ bool fast_unwind{false};
+ };
+
+ // For signal trace.
+ struct SignalSync : public butil::SharedObject {
+ ~SignalSync() override;
+ bool Init();
+
+ unw_context_t* context{NULL};
+ sem_t sem{};
+ int pipe_fds[2]{};
+
+ private:
+ bool _pipe_init{false};
+ bool _sem_init{false};
+ };
+
+ static TaskStatus WaitForJumping(TaskMeta* m);
+ Result TraceImpl(bthread_t tid);
+
+ unw_cursor_t MakeCursor(bthread_fcontext_t fcontext);
+ Result ContextTrace(bthread_fcontext_t fcontext);
+
+ static bool RegisterSignalHandler();
+ static void SignalHandler(int sig, siginfo_t* info, void* context);
+ static bool WaitForSignalHandler(butil::intrusive_ptr<SignalSync>
signal_sync,
+ const timespec* abs_timeout, Result&
result);
+ static void WakeupSignalHandler(
+ butil::intrusive_ptr<SignalSync> signal_sync, Result& result);
+ Result SignalTrace(pid_t worker_tid);
+
+ static Result TraceCore(unw_cursor_t& cursor);
+
+ // Make sure only one bthread is traced at a time.
+ bthread::Mutex _trace_request_mutex;
+
+ // For signal trace.
+ // Make sure bthread does not jump stack when it is being traced.
+ butil::Mutex _mutex;
+ butil::ConditionVariable _cond{&_mutex};
+
+ // For context trace.
+ unw_context_t _context{};
+
+ bvar::LatencyRecorder _trace_time{"bthread_trace_time"};
+};
+
+} // namespace bthread
+
+#endif // BRPC_BTHREAD_TRACER
+
+#endif // BRPC_BTHREAD_TRACER_H
diff --git a/src/butil/debug/stack_trace.cc b/src/butil/debug/stack_trace.cc
index 38abede9..97a4cd76 100644
--- a/src/butil/debug/stack_trace.cc
+++ b/src/butil/debug/stack_trace.cc
@@ -29,17 +29,18 @@ const void *const *StackTrace::Addresses(size_t* count)
const {
}
size_t StackTrace::CopyAddressTo(void** buffer, size_t max_nframes) const {
- size_t nframes = std::min(count_, max_nframes);
- memcpy(buffer, trace_, nframes * sizeof(void*));
- return nframes;
+ size_t nframes = std::min(count_, max_nframes);
+ memcpy(buffer, trace_, nframes * sizeof(void*));
+ return nframes;
}
std::string StackTrace::ToString() const {
- std::stringstream stream;
+ std::string str;
+ str.reserve(1024);
#if !defined(__UCLIBC__)
- OutputToStream(&stream);
+ OutputToString(str);
#endif
- return stream.str();
+ return str;
}
} // namespace debug
diff --git a/src/butil/debug/stack_trace.h b/src/butil/debug/stack_trace.h
index 5c6545ad..e8120583 100644
--- a/src/butil/debug/stack_trace.h
+++ b/src/butil/debug/stack_trace.h
@@ -79,6 +79,7 @@ class BUTIL_EXPORT StackTrace {
#if !defined(__UCLIBC__)
// Resolves backtrace to symbols and write to stream.
void OutputToStream(std::ostream* os) const;
+ void OutputToString(std::string& str) const;
#endif
// Resolves backtrace to symbols and returns as string.
diff --git a/src/butil/debug/stack_trace_posix.cc
b/src/butil/debug/stack_trace_posix.cc
index 878f94a7..9ef91c2f 100644
--- a/src/butil/debug/stack_trace_posix.cc
+++ b/src/butil/debug/stack_trace_posix.cc
@@ -429,6 +429,23 @@ class StreamBacktraceOutputHandler : public
BacktraceOutputHandler {
DISALLOW_COPY_AND_ASSIGN(StreamBacktraceOutputHandler);
};
+class StringBacktraceOutputHandler : public BacktraceOutputHandler {
+public:
+ explicit StringBacktraceOutputHandler(std::string& str) : _str(str) {}
+
+ DISALLOW_COPY_AND_ASSIGN(StringBacktraceOutputHandler);
+
+ void HandleOutput(const char* output) OVERRIDE {
+ if (NULL == output) {
+ return;
+ }
+ _str.append(output);
+ }
+
+private:
+ std::string& _str;
+};
+
void WarmUpBacktrace() {
// Warm up stack trace infrastructure. It turns out that on the first
// call glibc initializes some internal data structures using pthread_once,
@@ -801,6 +818,11 @@ void StackTrace::OutputToStream(std::ostream* os) const {
StreamBacktraceOutputHandler handler(os);
ProcessBacktrace(trace_, count_, &handler);
}
+
+void StackTrace::OutputToString(std::string& str) const {
+ StringBacktraceOutputHandler handler(str);
+ ProcessBacktrace(trace_, count_, &handler);
+}
#endif
namespace internal {
diff --git a/src/butil/memory/scope_guard.h b/src/butil/memory/scope_guard.h
index 837acbbc..7d72a560 100644
--- a/src/butil/memory/scope_guard.h
+++ b/src/butil/memory/scope_guard.h
@@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-#ifndef BRPC_SCOPED_GUARD_H
-#define BRPC_SCOPED_GUARD_H
+#ifndef BUTIL_SCOPED_GUARD_H
+#define BUTIL_SCOPED_GUARD_H
#include "butil/type_traits.h"
#include "butil/macros.h"
@@ -104,4 +104,4 @@ operator+(ScopeExitHelper, Callback&& callback) {
auto BRPC_ANONYMOUS_VARIABLE(SCOPE_EXIT) = \
::butil::internal::ScopeExitHelper() + [&]() noexcept
-#endif // BRPC_SCOPED_GUARD_H
+#endif // BUTIL_SCOPED_GUARD_H
diff --git a/src/butil/reloadable_flags.h b/src/butil/reloadable_flags.h
new file mode 100644
index 00000000..2cb11e92
--- /dev/null
+++ b/src/butil/reloadable_flags.h
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#ifndef BUTIL_RELOADABLE_FLAGS_H
+#define BUTIL_RELOADABLE_FLAGS_H
+
+#include <stdint.h>
+#include <unistd.h> // write, _exit
+#include <gflags/gflags.h>
+#include "butil/macros.h"
+#include "butil/type_traits.h"
+
+// Register an always-true valiator to a gflag so that the gflag is treated as
+// reloadable by brpc. If a validator exists, abort the program.
+// You should call this macro within global scope. for example:
+//
+// DEFINE_int32(foo, 0, "blah blah");
+// BRPC_VALIDATE_GFLAG(foo, brpc::PassValidate);
+//
+// This macro does not work for string-flags because they're thread-unsafe to
+// modify directly. To emphasize this, you have to write the validator by
+// yourself and use GFLAGS_NS::GetCommandLineOption() to acess the flag.
+#define BUTIL_VALIDATE_GFLAG(flag, validate_fn) \
+ namespace butil_flags {} \
+ const int register_FLAGS_ ## flag ## _dummy \
+ __attribute__((__unused__)) = \
+ ::butil::RegisterFlagValidatorOrDieImpl< \
+ decltype(FLAGS_##flag)>(&FLAGS_##flag, (validate_fn))
+
+
+namespace butil {
+
+template <typename T>
+bool PassValidate(const char*, T) {
+ return true;
+}
+
+template <typename T>
+bool PositiveInteger(const char*, T v) {
+ return v > 0;
+}
+
+template <typename T>
+bool RegisterFlagValidatorOrDieImpl(
+ const T* flag, bool (*validate_fn)(const char*, T val)) {
+ static_assert(!butil::is_same<std::string, T>::value,
+ "Not support string flags");
+ if (GFLAGS_NS::RegisterFlagValidator(flag, validate_fn)) {
+ return true;
+ }
+ // Error printed by gflags does not have newline. Add one to it.
+ char newline = '\n';
+ butil::ignore_result(write(2, &newline, 1));
+ _exit(1);
+}
+
+} // namespace butil
+
+
+#endif // BUTIL_RELOADABLE_FLAGS_H
diff --git a/src/brpc/shared_object.h b/src/butil/shared_object.h
similarity index 94%
copy from src/brpc/shared_object.h
copy to src/butil/shared_object.h
index d8ff9aae..abcfd46c 100644
--- a/src/brpc/shared_object.h
+++ b/src/butil/shared_object.h
@@ -16,14 +16,14 @@
// under the License.
-#ifndef BRPC_SHARED_OBJECT_H
-#define BRPC_SHARED_OBJECT_H
+#ifndef BUTIL_SHARED_OBJECT_H
+#define BUTIL_SHARED_OBJECT_H
#include "butil/intrusive_ptr.hpp" // butil::intrusive_ptr
#include "butil/atomicops.h"
-namespace brpc {
+namespace butil {
// Inherit this class to be intrusively shared. Comparing to shared_ptr,
// intrusive_ptr saves one malloc (for shared_count) and gets better cache
@@ -67,7 +67,7 @@ inline void intrusive_ptr_release(SharedObject* obj) {
obj->RemoveRefManually();
}
-} // namespace brpc
+} // namespace butil
-#endif // BRPC_SHARED_OBJECT_H
+#endif // BUTIL_SHARED_OBJECT_H
diff --git a/src/butil/time.h b/src/butil/time.h
index 90378c59..8b856998 100644
--- a/src/butil/time.h
+++ b/src/butil/time.h
@@ -384,7 +384,7 @@ public:
};
Timer() : _stop(0), _start(0) {}
- explicit Timer(const TimerType) {
+ explicit Timer(const TimerType) : Timer() {
start();
}
diff --git a/test/brpc_builtin_service_unittest.cpp
b/test/brpc_builtin_service_unittest.cpp
index 178c3f9e..8ea17a47 100644
--- a/test/brpc_builtin_service_unittest.cpp
+++ b/test/brpc_builtin_service_unittest.cpp
@@ -837,6 +837,17 @@ void* dummy_bthread(void*) {
return NULL;
}
+
+#ifdef BRPC_BTHREAD_TRACER
+bool g_bthread_trace_stop = false;
+void* bthread_trace(void*) {
+ while (!g_bthread_trace_stop) {
+ bthread_usleep(1000 * 100);
+ }
+ return NULL;
+}
+#endif // BRPC_BTHREAD_TRACER
+
TEST_F(BuiltinServiceTest, bthreads) {
brpc::BthreadsService service;
brpc::BthreadsRequest req;
@@ -867,7 +878,25 @@ TEST_F(BuiltinServiceTest, bthreads) {
service.default_method(&cntl, &req, &res, &done);
EXPECT_FALSE(cntl.Failed());
CheckContent(cntl, "stop=0");
- }
+ }
+
+#ifdef BRPC_BTHREAD_TRACER
+ {
+ bthread_t th;
+ EXPECT_EQ(0, bthread_start_background(&th, NULL, bthread_trace, NULL));
+ ClosureChecker done;
+ brpc::Controller cntl;
+ std::string id_string;
+ butil::string_printf(&id_string, "%llu?st=1", (unsigned long long)th);
+ cntl.http_request().uri().SetHttpURL("/bthreads/" + id_string);
+ cntl.http_request()._unresolved_path = id_string;
+ service.default_method(&cntl, &req, &res, &done);
+ g_bthread_trace_stop = true;
+ EXPECT_FALSE(cntl.Failed());
+ CheckContent(cntl, "stop=0");
+ CheckContent(cntl, "bthread_trace");
+ }
+#endif // BRPC_BTHREAD_TRACER
}
TEST_F(BuiltinServiceTest, sockets) {
diff --git a/test/bthread_unittest.cpp b/test/bthread_unittest.cpp
index 5ed8aba8..d1605234 100644
--- a/test/bthread_unittest.cpp
+++ b/test/bthread_unittest.cpp
@@ -20,14 +20,24 @@
#include "butil/time.h"
#include "butil/macros.h"
#include "butil/logging.h"
-#include "butil/logging.h"
#include "butil/gperftools_profiler.h"
#include "bthread/bthread.h"
#include "bthread/unstable.h"
#include "bthread/task_meta.h"
+int main(int argc, char* argv[]) {
+ testing::InitGoogleTest(&argc, argv);
+ GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true);
+ int rc = RUN_ALL_TESTS();
+ return rc;
+}
+
namespace bthread {
- extern __thread bthread::LocalStorage tls_bls;
+extern __thread bthread::LocalStorage tls_bls;
+DECLARE_bool(enable_fast_unwind);
+#ifdef BRPC_BTHREAD_TRACER
+extern std::string stack_trace(bthread_t tid);
+#endif // BRPC_BTHREAD_TRACER
}
namespace {
@@ -608,4 +618,44 @@ TEST_F(BthreadTest, yield_single_thread) {
ASSERT_EQ(0, bthread_join(tid, NULL));
}
+#ifdef BRPC_BTHREAD_TRACER
+TEST_F(BthreadTest, trace) {
+ stop = false;
+ bthread_t th;
+ ASSERT_EQ(0, bthread_start_urgent(&th, NULL, spin_and_log, (void*)1));
+ usleep(100 * 1000);
+ bthread::FLAGS_enable_fast_unwind = false;
+ std::string st = bthread::stack_trace(th);
+ LOG(INFO) << "fast_unwind spin_and_log stack trace:\n" << st;
+ ASSERT_NE(std::string::npos, st.find("spin_and_log"));
+
+ bthread::FLAGS_enable_fast_unwind = true;
+ st = bthread::stack_trace(th);
+ LOG(INFO) << "spin_and_log stack trace:\n" << st;
+ ASSERT_NE(std::string::npos, st.find("spin_and_log"));
+ stop = true;
+ ASSERT_EQ(0, bthread_join(th, NULL));
+
+ stop = false;
+ ASSERT_EQ(0, bthread_start_urgent(&th, NULL, repeated_sleep, (void*)1));
+ usleep(100 * 1000);
+ bthread::FLAGS_enable_fast_unwind = false;
+ st = bthread::stack_trace(th);
+ LOG(INFO) << "fast_unwind repeated_sleep stack trace:\n" << st;
+ ASSERT_NE(std::string::npos, st.find("repeated_sleep"));
+
+ bthread::FLAGS_enable_fast_unwind = true;
+ st = bthread::stack_trace(th);
+ LOG(INFO) << "repeated_sleep stack trace:\n" << st;
+ ASSERT_NE(std::string::npos, st.find("repeated_sleep"));
+ stop = true;
+ ASSERT_EQ(0, bthread_join(th, NULL));
+
+ st = bthread::stack_trace(th);
+ LOG(INFO) << "ended bthread stack trace:\n" << st;
+ ASSERT_NE(std::string::npos, st.find("not exist now"));
+
+}
+#endif // BRPC_BTHREAD_TRACER
+
} // namespace
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]