This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d177d5b0048 [chore](cloud) Support starting both meta-service and
recycler within single process (#40223)
d177d5b0048 is described below
commit d177d5b004876c11a06fa2975794286436ec9e22
Author: Gavin Chou <[email protected]>
AuthorDate: Wed Sep 4 21:27:17 2024 +0800
[chore](cloud) Support starting both meta-service and recycler within
single process (#40223)
e.g. the following will start meta-service and recycler within single
process.
```
./bin/start.sh --daemon
```
the log file will be meta_service.INFO*
and it is the same effect as `./bin/start.sh --meta-service --recycler
--daemon`
doc PR https://github.com/apache/doris-website/pull/1073
---
cloud/script/start.sh | 27 ++++++++++-----
cloud/src/common/config.h | 1 +
cloud/src/main.cpp | 60 +++++++++++++++++++--------------
cloud/src/recycler/checker.cpp | 2 ++
cloud/src/recycler/recycler.cpp | 5 +++
cloud/src/recycler/recycler_service.cpp | 2 +-
cloud/test/recycler_test.cpp | 1 +
7 files changed, 63 insertions(+), 35 deletions(-)
diff --git a/cloud/script/start.sh b/cloud/script/start.sh
index 28e986166ae..582c80c2e6f 100644
--- a/cloud/script/start.sh
+++ b/cloud/script/start.sh
@@ -122,7 +122,10 @@ fi
echo "LIBHDFS3_CONF=${LIBHDFS3_CONF}"
-export
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1"
+# to enable dump jeprof heap stats prodigally, change `prof:false` to
`prof:true`
+# to control the dump interval change `lg_prof_interval` to a specific value,
it is pow/exponent of 2 in size of bytes, default 34 means 2 ** 34 = 16GB
+# to control the dump path, change `prof_prefix` to a specific path, e.g.
/doris_cloud/log/ms_, by default it dumps at the path where the start command
called
+export
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof_prefix:ms_,prof:false,lg_prof_interval:34"
if [[ "${RUN_VERSION}" -eq 1 ]]; then
"${bin}" --version
@@ -131,14 +134,22 @@ fi
mkdir -p "${DORIS_HOME}/log"
echo "starts ${process} with args: $*"
+out_file=${DORIS_HOME}/log/${process}.out
if [[ "${RUN_DAEMON}" -eq 1 ]]; then
- date >>"${DORIS_HOME}/log/${process}.out"
- nohup "${bin}" "$@" >>"${DORIS_HOME}/log/${process}.out" 2>&1 &
- # wait for log flush
- sleep 1.5
- tail -n10 "${DORIS_HOME}/log/${process}.out" | grep 'working directory'
-B1 -A10
- echo "please check process log for more details"
- echo ""
+ # append 10 blank lines to ensure the following tail -n10 works correctly
+ printf "\n\n\n\n\n\n\n\n\n\n" >>"${out_file}"
+ echo "$(date +'%F %T') try to start ${process}" >>"${out_file}"
+ nohup "${bin}" "$@" >>"${out_file}" 2>&1 &
+ echo "wait and check ${process} start successfully"
+ sleep 3
+ tail -n10 "${out_file}" | grep 'successfully started brpc'
+ ret=$?
+ if [[ ${ret} -ne 0 ]]; then
+ echo "${process} may not start successfully please check process log
for more details"
+ exit 1
+ fi
+ echo "${process} start successfully"
+ exit 0
elif [[ "${RUN_CONSOLE}" -eq 1 ]]; then
export DORIS_LOG_TO_STDERR=1
date
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index e31a60a0d69..b1db41a6eb7 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -77,6 +77,7 @@ CONF_mInt32(scan_instances_interval_seconds, "60"); // 1min
CONF_mInt32(check_object_interval_seconds, "43200"); // 12hours
CONF_mInt64(check_recycle_task_interval_seconds, "600"); // 10min
+CONF_mInt64(recycler_sleep_before_scheduling_seconds, "60");
// log a warning if a recycle task takes longer than this duration
CONF_mInt64(recycle_task_threshold_seconds, "10800"); // 3h
diff --git a/cloud/src/main.cpp b/cloud/src/main.cpp
index 9356a3546d0..74e6a8daaf1 100644
--- a/cloud/src/main.cpp
+++ b/cloud/src/main.cpp
@@ -161,13 +161,13 @@ DECLARE_int64(socket_max_unwritten_bytes);
int main(int argc, char** argv) {
if (argc > 1) {
if (auto ret = args.parse(argc - 1, argv + 1); !ret.empty()) {
- std::cerr << ret << std::endl;
+ std::cerr << "parse arguments error: " << ret << std::endl;
help();
return -1;
}
}
- if (argc < 2 || args.get<bool>(ARG_HELP)) {
+ if (args.get<bool>(ARG_HELP)) {
help();
return 0;
}
@@ -177,21 +177,16 @@ int main(int argc, char** argv) {
return 0;
}
- // FIXME(gavin): do we need to enable running both MS and recycler within
- // single process
- if (!(args.get<bool>(ARG_META_SERVICE) ^ args.get<bool>(ARG_RECYCLER))) {
- std::cerr << "only one of --meta-service and --recycler must be
specified" << std::endl;
- return 1;
- }
-
- // There may be more roles to play
+ // There may be more roles to play in the future, if there are multi roles
specified,
+ // use meta_service as the process name
std::string process_name = args.get<bool>(ARG_META_SERVICE) ?
"meta_service"
: args.get<bool>(ARG_RECYCLER) ? "recycler"
- : "";
- if (process_name.empty()) {
- std::cerr << "failed to determine prcess name with given args" <<
std::endl;
- return 1;
- }
+ :
"meta_service";
+
+ using namespace std::chrono;
+
+ auto start = steady_clock::now();
+ auto end = start;
auto pid_file_fd_holder = gen_pidfile("doris_cloud");
if (pid_file_fd_holder == nullptr) {
@@ -215,11 +210,19 @@ int main(int argc, char** argv) {
}
// We can invoke glog from now on
-
std::string msg;
+ LOG(INFO) << "try to start doris_cloud";
LOG(INFO) << build_info();
std::cout << build_info() << std::endl;
+ if (!args.get<bool>(ARG_META_SERVICE) && !args.get<bool>(ARG_RECYCLER)) {
+ std::get<0>(args.args()[ARG_META_SERVICE]) = true;
+ std::get<0>(args.args()[ARG_RECYCLER]) = true;
+ LOG(INFO) << "meta_service and recycler are both not specified, "
+ "run doris_cloud as meta_service and recycler by default";
+ std::cout << "run doris_cloud as meta_service and recycler by default"
<< std::endl;
+ }
+
brpc::Server server;
brpc::FLAGS_max_body_size = config::brpc_max_body_size;
brpc::FLAGS_socket_max_unwritten_bytes =
config::brpc_socket_max_unwritten_bytes;
@@ -238,19 +241,22 @@ int main(int argc, char** argv) {
return 1;
}
LOG(INFO) << "begin to init txn kv";
+ auto start_init_kv = steady_clock::now();
int ret = txn_kv->init();
if (ret != 0) {
LOG(WARNING) << "failed to init txnkv, ret=" << ret;
return 1;
}
- LOG(INFO) << "successfully init txn kv";
+ end = steady_clock::now();
+ LOG(INFO) << "successfully init txn kv, elapsed milliseconds: "
+ << duration_cast<milliseconds>(end - start_init_kv).count();
if (init_global_encryption_key_info_map(txn_kv.get()) != 0) {
LOG(WARNING) << "failed to init global encryption key map";
return -1;
}
- std::unique_ptr<MetaServer> meta_server;
+ std::unique_ptr<MetaServer> meta_server; // meta-service
std::unique_ptr<Recycler> recycler;
std::thread periodiccally_log_thread;
std::mutex periodiccally_log_thread_lock;
@@ -269,7 +275,8 @@ int main(int argc, char** argv) {
msg = "meta-service started";
LOG(INFO) << msg;
std::cout << msg << std::endl;
- } else if (args.get<bool>(ARG_RECYCLER)) {
+ }
+ if (args.get<bool>(ARG_RECYCLER)) {
recycler = std::make_unique<Recycler>(txn_kv);
int ret = recycler->start(&server);
if (ret != 0) {
@@ -284,15 +291,12 @@ int main(int argc, char** argv) {
auto periodiccally_log = [&]() {
while (periodiccally_log_thread_run) {
std::unique_lock<std::mutex> lck
{periodiccally_log_thread_lock};
- periodiccally_log_thread_cv.wait_for(
- lck,
std::chrono::milliseconds(config::periodically_log_ms));
+ periodiccally_log_thread_cv.wait_for(lck,
+
milliseconds(config::periodically_log_ms));
LOG(INFO) << "Periodically log for recycler";
}
};
periodiccally_log_thread = std::thread {periodiccally_log};
- } else {
- std::cerr << "cloud starts without doing anything and exits" <<
std::endl;
- return -1;
}
// start service
brpc::ServerOptions options;
@@ -309,7 +313,11 @@ int main(int argc, char** argv) {
<< ", errmsg=" << strerror_r(errno, buf, 64) << ", port="
<< port;
return -1;
}
- LOG(INFO) << "successfully started brpc listening on port=" << port;
+ end = steady_clock::now();
+ msg = "successfully started brpc listening on port=" +
std::to_string(port) +
+ " time_elapsed_ms=" + std::to_string(duration_cast<milliseconds>(end
- start).count());
+ LOG(INFO) << msg;
+ std::cout << msg << std::endl;
server.RunUntilAskedToQuit(); // Wait for signals
server.ClearServices();
@@ -326,7 +334,7 @@ int main(int argc, char** argv) {
periodiccally_log_thread_run = false;
// immediately notify the log thread to quickly exit in case it
block the
// whole procedure
- periodiccally_log_thread_cv.notify_one();
+ periodiccally_log_thread_cv.notify_all();
}
periodiccally_log_thread.join();
}
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 49421f97ca0..c3e9f69ed9d 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -79,6 +79,8 @@ int Checker::start() {
// launch instance scanner
auto scanner_func = [this]() {
+ std::this_thread::sleep_for(
+
std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
while (!stopped()) {
std::vector<InstanceInfoPB> instances;
get_all_instances(txn_kv_.get(), instances);
diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index 9db16a18c13..76d4a7ca767 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -189,6 +189,11 @@ Recycler::~Recycler() {
}
void Recycler::instance_scanner_callback() {
+ // sleep 60 seconds before scheduling for the launch procedure to complete:
+ // some bad hdfs connection may cause some log to stdout stderr
+ // which may pollute .out file and affect the script to check success
+ std::this_thread::sleep_for(
+
std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
while (!stopped()) {
std::vector<InstanceInfoPB> instances;
get_all_instances(txn_kv_.get(), instances);
diff --git a/cloud/src/recycler/recycler_service.cpp
b/cloud/src/recycler/recycler_service.cpp
index 3c1a5b2ab48..08e937a4106 100644
--- a/cloud/src/recycler/recycler_service.cpp
+++ b/cloud/src/recycler/recycler_service.cpp
@@ -448,7 +448,7 @@ void
RecyclerServiceImpl::http(::google::protobuf::RpcController* controller,
}
status_code = 404;
- msg = "not found";
+ msg = "http path " + uri.path() + " not found, it may be not implemented";
response_body = msg;
}
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index d767c1bd8b7..14687354839 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -64,6 +64,7 @@ int main(int argc, char** argv) {
using namespace std::chrono;
current_time =
duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
+ config::recycler_sleep_before_scheduling_seconds = 0; // we dont have to
wait in UT
::testing::InitGoogleTest(&argc, argv);
auto s3_producer_pool =
std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]