This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 97ce93ed58 [fix](create tablet) fix backend create tablet timeout
(#23885)
97ce93ed58 is described below
commit 97ce93ed58d344b9e879e94cc2772bf8bc1fe0e2
Author: yujun <[email protected]>
AuthorDate: Sun Sep 10 11:13:38 2023 +0800
[fix](create tablet) fix backend create tablet timeout (#23885)
---
be/src/olap/tablet.cpp | 1 +
be/src/olap/tablet_manager.cpp | 308 ++++++++++++++++++++---------------------
be/src/olap/tablet_manager.h | 11 +-
3 files changed, 157 insertions(+), 163 deletions(-)
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 968b6ad9d4..98ef536d9f 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2404,6 +2404,7 @@ void Tablet::update_self_owned_remote_rowsets(
}
bool Tablet::check_all_rowset_segment() {
+ std::shared_lock rdlock(_meta_lock);
for (auto& version_rowset : _rs_version_map) {
RowsetSharedPtr rowset = version_rowset.second;
if (!rowset->check_rowset_segment()) {
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index a523ec620d..14c2489c24 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -599,36 +599,41 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId
tablet_id, TabletUid tablet_
std::vector<TabletSharedPtr>
TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) {
std::vector<TabletSharedPtr> res;
+ for_each_tablet([&](const TabletSharedPtr& tablet) {
res.emplace_back(tablet); },
+ std::move(filter));
+ return res;
+}
+
+void TabletManager::for_each_tablet(std::function<void(const
TabletSharedPtr&)>&& handler,
+ std::function<bool(Tablet*)>&& filter) {
+ std::vector<TabletSharedPtr> tablets;
for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (auto& [id, tablet] : tablets_shard.tablet_map) {
- if (filter(tablet.get())) {
- res.emplace_back(tablet);
+ tablets.clear();
+ {
+ std::shared_lock rdlock(tablets_shard.lock);
+ for (const auto& [id, tablet] : tablets_shard.tablet_map) {
+ if (filter(tablet.get())) {
+ tablets.emplace_back(tablet);
+ }
}
}
+ for (const auto& tablet : tablets) {
+ handler(tablet);
+ }
}
- return res;
}
uint64_t TabletManager::get_rowset_nums() {
uint64_t rowset_nums = 0;
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& tablet_map : tablets_shard.tablet_map) {
- rowset_nums += tablet_map.second->version_count();
- }
- }
+ for_each_tablet([&](const TabletSharedPtr& tablet) { rowset_nums +=
tablet->version_count(); },
+ filter_all_tablets);
return rowset_nums;
}
uint64_t TabletManager::get_segment_nums() {
uint64_t segment_nums = 0;
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& tablet_map : tablets_shard.tablet_map) {
- segment_nums += tablet_map.second->segment_count();
- }
- }
+ for_each_tablet([&](const TabletSharedPtr& tablet) { segment_nums +=
tablet->segment_count(); },
+ filter_all_tablets);
return segment_nums;
}
@@ -686,63 +691,60 @@ TabletSharedPtr
TabletManager::find_best_tablet_to_compaction(
uint32_t compaction_score = 0;
double tablet_scan_frequency = 0.0;
TabletSharedPtr best_tablet;
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& tablet_map : tablets_shard.tablet_map) {
- const TabletSharedPtr& tablet_ptr = tablet_map.second;
- if (!tablet_ptr->can_do_compaction(data_dir->path_hash(),
compaction_type)) {
- continue;
- }
+ auto handler = [&](const TabletSharedPtr& tablet_ptr) {
+ if (!tablet_ptr->can_do_compaction(data_dir->path_hash(),
compaction_type)) {
+ return;
+ }
- auto search =
tablet_submitted_compaction.find(tablet_ptr->tablet_id());
- if (search != tablet_submitted_compaction.end()) {
- continue;
- }
+ auto search =
tablet_submitted_compaction.find(tablet_ptr->tablet_id());
+ if (search != tablet_submitted_compaction.end()) {
+ return;
+ }
- int64_t last_failure_ms =
tablet_ptr->last_cumu_compaction_failure_time();
- if (compaction_type == CompactionType::BASE_COMPACTION) {
- last_failure_ms =
tablet_ptr->last_base_compaction_failure_time();
- }
- if (now_ms - last_failure_ms <=
config::min_compaction_failure_interval_sec * 1000) {
- continue;
- }
+ int64_t last_failure_ms =
tablet_ptr->last_cumu_compaction_failure_time();
+ if (compaction_type == CompactionType::BASE_COMPACTION) {
+ last_failure_ms = tablet_ptr->last_base_compaction_failure_time();
+ }
+ if (now_ms - last_failure_ms <=
config::min_compaction_failure_interval_sec * 1000) {
+ return;
+ }
- if (compaction_type == CompactionType::BASE_COMPACTION) {
- std::unique_lock<std::mutex>
lock(tablet_ptr->get_base_compaction_lock(),
- std::try_to_lock);
- if (!lock.owns_lock()) {
- LOG(INFO) << "can not get base lock: " <<
tablet_ptr->tablet_id();
- continue;
- }
- } else {
- std::unique_lock<std::mutex>
lock(tablet_ptr->get_cumulative_compaction_lock(),
- std::try_to_lock);
- if (!lock.owns_lock()) {
- LOG(INFO) << "can not get cumu lock: " <<
tablet_ptr->tablet_id();
- continue;
- }
+ if (compaction_type == CompactionType::BASE_COMPACTION) {
+ std::unique_lock<std::mutex>
lock(tablet_ptr->get_base_compaction_lock(),
+ std::try_to_lock);
+ if (!lock.owns_lock()) {
+ LOG(INFO) << "can not get base lock: " <<
tablet_ptr->tablet_id();
+ return;
+ }
+ } else {
+ std::unique_lock<std::mutex>
lock(tablet_ptr->get_cumulative_compaction_lock(),
+ std::try_to_lock);
+ if (!lock.owns_lock()) {
+ LOG(INFO) << "can not get cumu lock: " <<
tablet_ptr->tablet_id();
+ return;
}
+ }
- uint32_t current_compaction_score =
tablet_ptr->calc_compaction_score(
- compaction_type, cumulative_compaction_policy);
+ uint32_t current_compaction_score =
+ tablet_ptr->calc_compaction_score(compaction_type,
cumulative_compaction_policy);
- double scan_frequency = 0.0;
- if (config::compaction_tablet_scan_frequency_factor != 0) {
- scan_frequency = tablet_ptr->calculate_scan_frequency();
- }
+ double scan_frequency = 0.0;
+ if (config::compaction_tablet_scan_frequency_factor != 0) {
+ scan_frequency = tablet_ptr->calculate_scan_frequency();
+ }
- double tablet_score =
- config::compaction_tablet_scan_frequency_factor *
scan_frequency +
- config::compaction_tablet_compaction_score_factor *
current_compaction_score;
- if (tablet_score > highest_score) {
- highest_score = tablet_score;
- compaction_score = current_compaction_score;
- tablet_scan_frequency = scan_frequency;
- best_tablet = tablet_ptr;
- }
+ double tablet_score =
+ config::compaction_tablet_scan_frequency_factor *
scan_frequency +
+ config::compaction_tablet_compaction_score_factor *
current_compaction_score;
+ if (tablet_score > highest_score) {
+ highest_score = tablet_score;
+ compaction_score = current_compaction_score;
+ tablet_scan_frequency = scan_frequency;
+ best_tablet = tablet_ptr;
}
- }
+ };
+ for_each_tablet(handler, filter_all_tablets);
if (best_tablet != nullptr) {
VLOG_CRITICAL << "Found the best tablet for compaction. "
<< "compaction_type=" << compaction_type_str
@@ -913,33 +915,31 @@ Status
TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
DorisMetrics::instance()->report_all_tablets_requests_total->increment(1);
HistogramStat tablet_version_num_hist;
auto local_cache = std::make_shared<std::vector<TTabletStat>>();
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& item : tablets_shard.tablet_map) {
- uint64_t tablet_id = item.first;
- TabletSharedPtr tablet_ptr = item.second;
- TTablet t_tablet;
- TTabletInfo tablet_info;
- tablet_ptr->build_tablet_report_info(&tablet_info, true);
- // find expired transaction corresponding to this tablet
- TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(),
tablet_ptr->tablet_uid());
- auto find = expire_txn_map.find(tinfo);
- if (find != expire_txn_map.end()) {
- tablet_info.__set_transaction_ids(find->second);
- expire_txn_map.erase(find);
- }
- t_tablet.tablet_infos.push_back(tablet_info);
- tablet_version_num_hist.add(tablet_ptr->version_count());
- tablets_info->emplace(tablet_id, t_tablet);
- TTabletStat t_tablet_stat;
- t_tablet_stat.__set_tablet_id(tablet_info.tablet_id);
- t_tablet_stat.__set_data_size(tablet_info.data_size);
- t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
- t_tablet_stat.__set_row_num(tablet_info.row_count);
- t_tablet_stat.__set_version_count(tablet_info.version_count);
- local_cache->emplace_back(std::move(t_tablet_stat));
+ auto handler = [&](const TabletSharedPtr& tablet_ptr) {
+ uint64_t tablet_id = tablet_ptr->tablet_id();
+ TTablet t_tablet;
+ TTabletInfo tablet_info;
+ tablet_ptr->build_tablet_report_info(&tablet_info, true);
+ // find expired transaction corresponding to this tablet
+ TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(),
tablet_ptr->tablet_uid());
+ auto find = expire_txn_map.find(tinfo);
+ if (find != expire_txn_map.end()) {
+ tablet_info.__set_transaction_ids(find->second);
+ expire_txn_map.erase(find);
}
- }
+ t_tablet.tablet_infos.push_back(tablet_info);
+ tablet_version_num_hist.add(tablet_ptr->version_count());
+ tablets_info->emplace(tablet_id, t_tablet);
+ TTabletStat t_tablet_stat;
+ t_tablet_stat.__set_tablet_id(tablet_info.tablet_id);
+ t_tablet_stat.__set_data_size(tablet_info.data_size);
+ t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
+ t_tablet_stat.__set_row_num(tablet_info.row_count);
+ t_tablet_stat.__set_version_count(tablet_info.version_count);
+ local_cache->emplace_back(std::move(t_tablet_stat));
+ };
+ for_each_tablet(handler, filter_all_tablets);
+
{
std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex);
_tablet_stat_list_cache = local_cache;
@@ -953,23 +953,9 @@ Status
TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
Status TabletManager::start_trash_sweep() {
SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
{
- std::vector<TabletSharedPtr>
- all_tablets; // we use this vector to save all tablet ptr for
saving lock time.
- for (auto& tablets_shard : _tablets_shards) {
- tablet_map_t& tablet_map = tablets_shard.tablet_map;
- {
- std::shared_lock rdlock(tablets_shard.lock);
- for (auto& item : tablet_map) {
- // try to clean empty item
- all_tablets.push_back(item.second);
- }
- }
- // Avoid hold the shard lock too long, so we get tablet to a
vector and clean here
- for (const auto& tablet : all_tablets) {
- tablet->delete_expired_stale_rowset();
- }
- all_tablets.clear();
- }
+ for_each_tablet(
+ [](const TabletSharedPtr& tablet) {
tablet->delete_expired_stale_rowset(); },
+ filter_all_tablets);
}
int32_t clean_num = 0;
@@ -1130,24 +1116,13 @@ void
TabletManager::get_partition_related_tablets(int64_t partition_id,
void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) {
SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
- std::vector<TabletSharedPtr> related_tablets;
- {
- for (auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (auto& item : tablets_shard.tablet_map) {
- TabletSharedPtr& tablet_ptr = item.second;
- if (tablet_ptr->tablet_state() != TABLET_RUNNING) {
- continue;
- }
+ auto filter = [data_dir](Tablet* tablet) -> bool {
+ return tablet->tablet_state() == TABLET_RUNNING &&
+ tablet->data_dir()->path_hash() == data_dir->path_hash() &&
tablet->is_used() &&
+ tablet->init_succeeded();
+ };
- if (tablet_ptr->data_dir()->path_hash() !=
data_dir->path_hash() ||
- !tablet_ptr->is_used() || !tablet_ptr->init_succeeded()) {
- continue;
- }
- related_tablets.push_back(tablet_ptr);
- }
- }
- }
+ std::vector<TabletSharedPtr> related_tablets = get_all_tablet(filter);
int counter = 0;
MonotonicStopWatch watch;
watch.start();
@@ -1318,17 +1293,14 @@ struct SortCtx {
void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>*
tablets) {
std::vector<SortCtx> sort_ctx_vec;
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& item : tablets_shard.tablet_map) {
- const TabletSharedPtr& tablet = item.second;
- int64_t cooldown_timestamp = -1;
- size_t file_size = -1;
- if (tablet->need_cooldown(&cooldown_timestamp, &file_size)) {
- sort_ctx_vec.emplace_back(tablet, cooldown_timestamp,
file_size);
- }
+ auto handler = [&](const TabletSharedPtr& tablet) {
+ int64_t cooldown_timestamp = -1;
+ size_t file_size = -1;
+ if (tablet->need_cooldown(&cooldown_timestamp, &file_size)) {
+ sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size);
}
- }
+ };
+ for_each_tablet(handler, filter_all_tablets);
std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end(), [](SortCtx a, SortCtx
b) {
if (a.cooldown_timestamp != -1 && b.cooldown_timestamp != -1) {
@@ -1354,44 +1326,58 @@ void
TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets)
void TabletManager::get_all_tablets_storage_format(TCheckStorageFormatResult*
result) {
DCHECK(result != nullptr);
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& item : tablets_shard.tablet_map) {
- uint64_t tablet_id = item.first;
- if (item.second->all_beta()) {
- result->v2_tablets.push_back(tablet_id);
- } else {
- result->v1_tablets.push_back(tablet_id);
- }
+ auto handler = [result](const TabletSharedPtr& tablet) {
+ if (tablet->all_beta()) {
+ result->v2_tablets.push_back(tablet->tablet_id());
+ } else {
+ result->v1_tablets.push_back(tablet->tablet_id());
}
- }
+ };
+
+ for_each_tablet(handler, filter_all_tablets);
result->__isset.v1_tablets = true;
result->__isset.v2_tablets = true;
}
std::set<int64_t> TabletManager::check_all_tablet_segment(bool repair) {
std::set<int64_t> bad_tablets;
- for (const auto& tablets_shard : _tablets_shards) {
+ std::map<int64_t, std::vector<int64_t>> repair_shard_bad_tablets;
+ auto handler = [&](const TabletSharedPtr& tablet) {
+ if (!tablet->check_all_rowset_segment()) {
+ int64_t tablet_id = tablet->tablet_id();
+ bad_tablets.insert(tablet_id);
+ if (repair) {
+ repair_shard_bad_tablets[tablet_id &
_tablets_shards_mask].push_back(tablet_id);
+ }
+ }
+ };
+ for_each_tablet(handler, filter_all_tablets);
+
+ for (const auto& [shard_index, shard_tablets] : repair_shard_bad_tablets) {
+ auto& tablets_shard = _tablets_shards[shard_index];
+ auto& tablet_map = tablets_shard.tablet_map;
std::lock_guard<std::shared_mutex> wrlock(tablets_shard.lock);
- for (const auto& item : tablets_shard.tablet_map) {
- TabletSharedPtr tablet = item.second;
- if (!tablet->check_all_rowset_segment()) {
- bad_tablets.insert(tablet->tablet_id());
- if (repair) {
- tablet->set_tablet_state(TABLET_SHUTDOWN);
- tablet->save_meta();
- {
- std::lock_guard<std::shared_mutex>
shutdown_tablets_wrlock(
- _shutdown_tablets_lock);
- _shutdown_tablets.push_back(tablet);
- }
- LOG(WARNING) << "There are some segments lost, set tablet
to shutdown state."
- << "tablet_id=" << tablet->tablet_id()
- << ", tablet_path=" << tablet->tablet_path();
+ for (auto tablet_id : shard_tablets) {
+ auto it = tablet_map.find(tablet_id);
+ if (it == tablet_map.end()) {
+ bad_tablets.erase(tablet_id);
+ LOG(WARNING) << "Bad tablet has be removed. tablet_id=" <<
tablet_id;
+ } else {
+ const auto& tablet = it->second;
+ tablet->set_tablet_state(TABLET_SHUTDOWN);
+ tablet->save_meta();
+ {
+ std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(
+ _shutdown_tablets_lock);
+ _shutdown_tablets.push_back(tablet);
}
+ LOG(WARNING) << "There are some segments lost, set tablet to
shutdown state."
+ << "tablet_id=" << tablet->tablet_id()
+ << ", tablet_path=" << tablet->tablet_path();
}
}
}
+
return bad_tablets;
}
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 378ff499ac..b0b5914441 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -78,8 +78,15 @@ public:
TabletSharedPtr get_tablet(TTabletId tablet_id, TabletUid tablet_uid,
bool include_deleted = false, std::string* err
= nullptr);
- std::vector<TabletSharedPtr> get_all_tablet(std::function<bool(Tablet*)>&&
filter =
- [](Tablet* t) { return
t->is_used(); });
+ std::vector<TabletSharedPtr> get_all_tablet(
+ std::function<bool(Tablet*)>&& filter = filter_used_tablets);
+
+ // Handler not hold the shard lock.
+ void for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler,
+ std::function<bool(Tablet*)>&& filter =
filter_used_tablets);
+
+ static bool filter_all_tablets(Tablet* tablet) { return true; }
+ static bool filter_used_tablets(Tablet* tablet) { return
tablet->is_used(); }
uint64_t get_rowset_nums();
uint64_t get_segment_nums();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]