This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6a33e4639af [schedule](pipeline) Remove wait schedule time in pipeline
query engine and change current queue to std::mutex (#24525)
6a33e4639af is described below
commit 6a33e4639afe687923ecb75e1c42a7f96f033772
Author: HappenLee <[email protected]>
AuthorDate: Mon Sep 18 23:57:56 2023 +0800
[schedule](pipeline) Remove wait schedule time in pipeline query engine and
change current queue to std::mutex (#24525)
This reverts commit 591aeaa98d1178e2e277278c7afeafef9bdb88d6.
---
be/src/pipeline/pipeline_task.cpp | 2 --
be/src/pipeline/pipeline_task.h | 4 ----
be/src/pipeline/task_scheduler.cpp | 31 ++++++++++---------------
be/src/pipeline/task_scheduler.h | 1 -
be/src/vec/exec/scan/pip_scanner_context.h | 37 +++++++++++++++++++++---------
5 files changed, 38 insertions(+), 37 deletions(-)
diff --git a/be/src/pipeline/pipeline_task.cpp
b/be/src/pipeline/pipeline_task.cpp
index bb27012b3d4..add92cabf37 100644
--- a/be/src/pipeline/pipeline_task.cpp
+++ b/be/src/pipeline/pipeline_task.cpp
@@ -86,7 +86,6 @@ void PipelineTask::_fresh_profile_counter() {
COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
COUNTER_SET(_wait_sink_timer, (int64_t)_wait_sink_watcher.elapsed_time());
COUNTER_SET(_wait_worker_timer,
(int64_t)_wait_worker_watcher.elapsed_time());
- COUNTER_SET(_wait_schedule_timer,
(int64_t)_wait_schedule_watcher.elapsed_time());
COUNTER_SET(_begin_execute_timer, _begin_execute_time);
COUNTER_SET(_eos_timer, _eos_time);
COUNTER_SET(_src_pending_finish_over_timer, _src_pending_finish_over_time);
@@ -117,7 +116,6 @@ void PipelineTask::_init_profile() {
_wait_bf_timer = ADD_TIMER(_task_profile, "WaitBfTime");
_wait_sink_timer = ADD_TIMER(_task_profile, "WaitSinkTime");
_wait_worker_timer = ADD_TIMER(_task_profile, "WaitWorkerTime");
- _wait_schedule_timer = ADD_TIMER(_task_profile, "WaitScheduleTime");
_block_counts = ADD_COUNTER(_task_profile, "NumBlockedTimes", TUnit::UNIT);
_block_by_source_counts = ADD_COUNTER(_task_profile,
"NumBlockedBySrcTimes", TUnit::UNIT);
_block_by_sink_counts = ADD_COUNTER(_task_profile,
"NumBlockedBySinkTimes", TUnit::UNIT);
diff --git a/be/src/pipeline/pipeline_task.h b/be/src/pipeline/pipeline_task.h
index 4311c48f325..b8b8e89215f 100644
--- a/be/src/pipeline/pipeline_task.h
+++ b/be/src/pipeline/pipeline_task.h
@@ -133,8 +133,6 @@ public:
_wait_worker_watcher.start();
}
void pop_out_runnable_queue() { _wait_worker_watcher.stop(); }
- void start_schedule_watcher() { _wait_schedule_watcher.start(); }
- void stop_schedule_watcher() { _wait_schedule_watcher.stop(); }
PipelineTaskState get_state() { return _cur_state; }
void set_state(PipelineTaskState state);
@@ -311,8 +309,6 @@ protected:
MonotonicStopWatch _wait_worker_watcher;
RuntimeProfile::Counter* _wait_worker_timer;
// TODO we should calculate the time between when really runnable and
runnable
- MonotonicStopWatch _wait_schedule_watcher;
- RuntimeProfile::Counter* _wait_schedule_timer;
RuntimeProfile::Counter* _yield_counts;
RuntimeProfile::Counter* _core_change_times;
diff --git a/be/src/pipeline/task_scheduler.cpp
b/be/src/pipeline/task_scheduler.cpp
index e4a4ec38af9..c4278c38077 100644
--- a/be/src/pipeline/task_scheduler.cpp
+++ b/be/src/pipeline/task_scheduler.cpp
@@ -85,7 +85,6 @@ void BlockedTaskScheduler::_schedule() {
_started.store(true);
std::list<PipelineTask*> local_blocked_tasks;
int empty_times = 0;
- std::vector<PipelineTask*> ready_tasks;
while (!_shutdown) {
{
@@ -105,6 +104,7 @@ void BlockedTaskScheduler::_schedule() {
}
}
+ auto origin_local_block_tasks_size = local_blocked_tasks.size();
auto iter = local_blocked_tasks.begin();
vectorized::VecDateTimeValue now =
vectorized::VecDateTimeValue::local_time();
while (iter != local_blocked_tasks.end()) {
@@ -116,57 +116,52 @@ void BlockedTaskScheduler::_schedule() {
VLOG_DEBUG << "Task pending" << task->debug_string();
iter++;
} else {
- _make_task_run(local_blocked_tasks, iter, ready_tasks,
- PipelineTaskState::PENDING_FINISH);
+ _make_task_run(local_blocked_tasks, iter,
PipelineTaskState::PENDING_FINISH);
}
} else if (task->query_context()->is_cancelled()) {
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
} else if (task->query_context()->is_timeout(now)) {
LOG(WARNING) << "Timeout, query_id=" <<
print_id(task->query_context()->query_id())
<< ", instance_id=" <<
print_id(task->instance_id())
<< ", task info: " << task->debug_string();
task->query_context()->cancel(true, "", Status::Cancelled(""));
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
} else if (state == PipelineTaskState::BLOCKED_FOR_DEPENDENCY) {
if (task->has_dependency()) {
iter++;
} else {
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
}
} else if (state == PipelineTaskState::BLOCKED_FOR_SOURCE) {
if (task->source_can_read()) {
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
} else {
iter++;
}
} else if (state == PipelineTaskState::BLOCKED_FOR_RF) {
if (task->runtime_filters_are_ready_or_timeout()) {
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
} else {
iter++;
}
} else if (state == PipelineTaskState::BLOCKED_FOR_SINK) {
if (task->sink_can_write()) {
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
} else {
iter++;
}
} else {
// TODO: DCHECK the state
- _make_task_run(local_blocked_tasks, iter, ready_tasks);
+ _make_task_run(local_blocked_tasks, iter);
}
}
- if (ready_tasks.empty()) {
+ if (origin_local_block_tasks_size == 0 ||
+ local_blocked_tasks.size() == origin_local_block_tasks_size) {
empty_times += 1;
} else {
empty_times = 0;
- for (auto& task : ready_tasks) {
- task->stop_schedule_watcher();
- _task_queue->push_back(task);
- }
- ready_tasks.clear();
}
if (empty_times != 0 && (empty_times & (EMPTY_TIMES_TO_YIELD - 1)) ==
0) {
@@ -186,13 +181,11 @@ void BlockedTaskScheduler::_schedule() {
void BlockedTaskScheduler::_make_task_run(std::list<PipelineTask*>&
local_tasks,
std::list<PipelineTask*>::iterator&
task_itr,
- std::vector<PipelineTask*>&
ready_tasks,
PipelineTaskState t_state) {
auto task = *task_itr;
- task->start_schedule_watcher();
task->set_state(t_state);
local_tasks.erase(task_itr++);
- ready_tasks.emplace_back(task);
+ _task_queue->push_back(task);
}
TaskScheduler::~TaskScheduler() {
diff --git a/be/src/pipeline/task_scheduler.h b/be/src/pipeline/task_scheduler.h
index b9d3dfbac3c..13b9e734d69 100644
--- a/be/src/pipeline/task_scheduler.h
+++ b/be/src/pipeline/task_scheduler.h
@@ -71,7 +71,6 @@ private:
void _schedule();
void _make_task_run(std::list<PipelineTask*>& local_tasks,
std::list<PipelineTask*>::iterator& task_itr,
- std::vector<PipelineTask*>& ready_tasks,
PipelineTaskState state = PipelineTaskState::RUNNABLE);
};
diff --git a/be/src/vec/exec/scan/pip_scanner_context.h
b/be/src/vec/exec/scan/pip_scanner_context.h
index 0c11314fb2d..09ec0e3a553 100644
--- a/be/src/vec/exec/scan/pip_scanner_context.h
+++ b/be/src/vec/exec/scan/pip_scanner_context.h
@@ -70,12 +70,17 @@ public:
}
{
- if (!_blocks_queues[id].try_dequeue(*block)) {
+ std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
+ if (_blocks_queues[id].empty()) {
*eos = _is_finished || _should_stop;
return Status::OK();
- }
- if (_blocks_queues[id].size_approx() == 0 && _data_dependency) {
- _data_dependency->block_reading();
+ } else {
+ *block = std::move(_blocks_queues[id].front());
+ _blocks_queues[id].pop_front();
+
+ if (_blocks_queues[id].empty() && _data_dependency) {
+ _data_dependency->block_reading();
+ }
}
}
_current_used_bytes -= (*block)->allocated_bytes();
@@ -133,8 +138,9 @@ public:
for (int i = 0; i < queue_size && i < block_size; ++i) {
int queue = _next_queue_to_feed;
{
+ std::lock_guard<std::mutex> l(*_queue_mutexs[queue]);
for (int j = i; j < block_size; j += queue_size) {
- _blocks_queues[queue].enqueue(std::move(blocks[j]));
+
_blocks_queues[queue].emplace_back(std::move(blocks[j]));
}
}
_next_queue_to_feed = queue + 1 < queue_size ? queue + 1 : 0;
@@ -146,11 +152,15 @@ public:
_current_used_bytes += local_bytes;
}
- bool empty_in_queue(int id) override { return
_blocks_queues[id].size_approx() == 0; }
+ bool empty_in_queue(int id) override {
+ std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
+ return _blocks_queues[id].empty();
+ }
Status init() override {
for (int i = 0; i < _num_parallel_instances; ++i) {
-
_blocks_queues.emplace_back(moodycamel::ConcurrentQueue<vectorized::BlockUPtr>());
+ _queue_mutexs.emplace_back(std::make_unique<std::mutex>());
+ _blocks_queues.emplace_back(std::list<vectorized::BlockUPtr>());
}
RETURN_IF_ERROR(ScannerContext::init());
if (_need_colocate_distribute) {
@@ -182,9 +192,10 @@ public:
void _dispose_coloate_blocks_not_in_queue() override {
if (_need_colocate_distribute) {
for (int i = 0; i < _num_parallel_instances; ++i) {
+ std::scoped_lock s(*_colocate_block_mutexs[i],
*_queue_mutexs[i]);
if (_colocate_blocks[i] && !_colocate_blocks[i]->empty()) {
_current_used_bytes +=
_colocate_blocks[i]->allocated_bytes();
- _blocks_queues[i].enqueue(std::move(_colocate_blocks[i]));
+
_blocks_queues[i].emplace_back(std::move(_colocate_blocks[i]));
_colocate_mutable_blocks[i]->clear();
}
if (_data_dependency) {
@@ -198,14 +209,15 @@ public:
auto res = ScannerContext::debug_string();
for (int i = 0; i < _blocks_queues.size(); ++i) {
res += " queue " + std::to_string(i) + ":size " +
- std::to_string(_blocks_queues[i].size_approx());
+ std::to_string(_blocks_queues[i].size());
}
return res;
}
private:
int _next_queue_to_feed = 0;
- std::vector<moodycamel::ConcurrentQueue<vectorized::BlockUPtr>>
_blocks_queues;
+ std::vector<std::unique_ptr<std::mutex>> _queue_mutexs;
+ std::vector<std::list<vectorized::BlockUPtr>> _blocks_queues;
std::atomic_int64_t _current_used_bytes = 0;
const std::vector<int>& _col_distribute_ids;
@@ -238,7 +250,10 @@ private:
if (row_add == max_add) {
_current_used_bytes +=
_colocate_blocks[loc]->allocated_bytes();
- _blocks_queues[loc].enqueue(std::move(_colocate_blocks[loc]));
+ {
+ std::lock_guard<std::mutex> queue_l(*_queue_mutexs[loc]);
+
_blocks_queues[loc].emplace_back(std::move(_colocate_blocks[loc]));
+ }
if (_data_dependency) {
_data_dependency->set_ready_for_read();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]