This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new d501d05de17 branch-3.0: [fix](restore) Lock tablet before modify
segment files #45711 (#48048)
d501d05de17 is described below
commit d501d05de176673ff6923d448defd807537f7af5
Author: walter <[email protected]>
AuthorDate: Wed Feb 19 19:24:54 2025 +0800
branch-3.0: [fix](restore) Lock tablet before modify segment files #45711
(#48048)
cherry pick from #45711
---
be/src/olap/tablet.cpp | 4 +-
be/src/olap/tablet.h | 5 +-
be/src/runtime/snapshot_loader.cpp | 99 +++++++++++++++++++++++---------------
3 files changed, 64 insertions(+), 44 deletions(-)
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index d1d48b91ff3..217990315b1 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2717,7 +2717,7 @@ void Tablet::check_table_size_correctness() {
const std::vector<RowsetMetaSharedPtr>& all_rs_metas =
_tablet_meta->all_rs_metas();
for (const auto& rs_meta : all_rs_metas) {
int64_t total_segment_size = get_segment_file_size(rs_meta);
- int64_t total_inverted_index_size =
get_inverted_index_file_szie(rs_meta);
+ int64_t total_inverted_index_size =
get_inverted_index_file_size(rs_meta);
if (rs_meta->data_disk_size() != total_segment_size ||
rs_meta->index_disk_size() != total_inverted_index_size ||
rs_meta->data_disk_size() + rs_meta->index_disk_size() !=
rs_meta->total_disk_size()) {
@@ -2768,7 +2768,7 @@ int64_t Tablet::get_segment_file_size(const
RowsetMetaSharedPtr& rs_meta) {
return total_segment_size;
}
-int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr&
rs_meta) {
+int64_t Tablet::get_inverted_index_file_size(const RowsetMetaSharedPtr&
rs_meta) {
const auto& fs = rs_meta->fs();
if (!fs) {
LOG(WARNING) << "get fs failed, resource_id={}" <<
rs_meta->resource_id();
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index ca774a618fc..96bc5d87e3c 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -214,6 +214,7 @@ public:
std::mutex& get_push_lock() { return _ingest_lock; }
std::mutex& get_base_compaction_lock() { return _base_compaction_lock; }
std::mutex& get_cumulative_compaction_lock() { return
_cumulative_compaction_lock; }
+ std::shared_mutex& get_meta_store_lock() { return _meta_store_lock; }
std::shared_timed_mutex& get_migration_lock() { return _migration_lock; }
@@ -510,7 +511,7 @@ private:
void check_table_size_correctness();
std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t
seg_id);
int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta);
- int64_t get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta);
+ int64_t get_inverted_index_file_size(const RowsetMetaSharedPtr& rs_meta);
public:
static const int64_t K_INVALID_CUMULATIVE_POINT = -1;
@@ -567,7 +568,7 @@ private:
std::shared_ptr<CumulativeCompactionPolicy> _cumulative_compaction_policy;
std::string_view _cumulative_compaction_type;
- // use a seperate thread to check all tablets paths existance
+ // use a separate thread to check all tablets paths existence
std::atomic<bool> _is_tablet_path_exists;
int64_t _last_missed_version;
diff --git a/be/src/runtime/snapshot_loader.cpp
b/be/src/runtime/snapshot_loader.cpp
index d95bbb7f792..9007a4cc2b6 100644
--- a/be/src/runtime/snapshot_loader.cpp
+++ b/be/src/runtime/snapshot_loader.cpp
@@ -789,50 +789,69 @@ Status SnapshotLoader::move(const std::string&
snapshot_path, TabletSharedPtr ta
return Status::InternalError(err_msg);
}
- if (overwrite) {
- std::vector<std::string> snapshot_files;
- RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path,
&snapshot_files));
-
- // 1. simply delete the old dir and replace it with the snapshot dir
- try {
- // This remove seems soft enough, because we already get
- // tablet id and schema hash from this path, which
- // means this path is a valid path.
- std::filesystem::remove_all(tablet_path);
- VLOG_CRITICAL << "remove dir: " << tablet_path;
- std::filesystem::create_directory(tablet_path);
- VLOG_CRITICAL << "re-create dir: " << tablet_path;
- } catch (const std::filesystem::filesystem_error& e) {
- std::stringstream ss;
- ss << "failed to move tablet path: " << tablet_path << ". err: "
<< e.what();
- LOG(WARNING) << ss.str();
- return Status::InternalError(ss.str());
- }
+ if (!overwrite) {
+ LOG(FATAL) << "only support overwrite now";
+ __builtin_unreachable();
+ }
- // link files one by one
- // files in snapshot dir will be moved in snapshot clean process
- std::vector<std::string> linked_files;
- for (auto& file : snapshot_files) {
- auto full_src_path = fmt::format("{}/{}", snapshot_path, file);
- auto full_dest_path = fmt::format("{}/{}", tablet_path, file);
- if (link(full_src_path.c_str(), full_dest_path.c_str()) != 0) {
- LOG(WARNING) << "failed to link file from " << full_src_path
<< " to "
- << full_dest_path << ", err: " <<
std::strerror(errno);
-
- // clean the already linked files
- for (auto& linked_file : linked_files) {
- remove(linked_file.c_str());
- }
+ // Medium migration/clone/checkpoint/compaction may change or check the
+ // files and tablet meta, so we need to take these locks.
+ std::unique_lock migration_lock(tablet->get_migration_lock(),
std::try_to_lock);
+ std::unique_lock base_compact_lock(tablet->get_base_compaction_lock(),
std::try_to_lock);
+ std::unique_lock
cumu_compact_lock(tablet->get_cumulative_compaction_lock(), std::try_to_lock);
+ std::unique_lock cold_compact_lock(tablet->get_cold_compaction_lock(),
std::try_to_lock);
+ std::unique_lock build_idx_lock(tablet->get_build_inverted_index_lock(),
std::try_to_lock);
+ std::unique_lock meta_store_lock(tablet->get_meta_store_lock(),
std::try_to_lock);
+ if (!migration_lock.owns_lock() || !base_compact_lock.owns_lock() ||
+ !cumu_compact_lock.owns_lock() || !cold_compact_lock.owns_lock() ||
+ !build_idx_lock.owns_lock() || !meta_store_lock.owns_lock()) {
+ // This error should be retryable
+ auto status = Status::ObtainLockFailed("failed to get tablet locks,
tablet: {}", tablet_id);
+ LOG(WARNING) << status << ", snapshot path: " << snapshot_path
+ << ", tablet path: " << tablet_path;
+ return status;
+ }
- return Status::InternalError("move tablet failed");
+ std::vector<std::string> snapshot_files;
+ RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path,
&snapshot_files));
+
+ // FIXME: the below logic will demage the tablet files if failed in the
middle.
+
+ // 1. simply delete the old dir and replace it with the snapshot dir
+ try {
+ // This remove seems soft enough, because we already get
+ // tablet id and schema hash from this path, which
+ // means this path is a valid path.
+ std::filesystem::remove_all(tablet_path);
+ VLOG_CRITICAL << "remove dir: " << tablet_path;
+ std::filesystem::create_directory(tablet_path);
+ VLOG_CRITICAL << "re-create dir: " << tablet_path;
+ } catch (const std::filesystem::filesystem_error& e) {
+ std::stringstream ss;
+ ss << "failed to move tablet path: " << tablet_path << ". err: " <<
e.what();
+ LOG(WARNING) << ss.str();
+ return Status::InternalError(ss.str());
+ }
+
+ // link files one by one
+ // files in snapshot dir will be moved in snapshot clean process
+ std::vector<std::string> linked_files;
+ for (auto& file : snapshot_files) {
+ auto full_src_path = fmt::format("{}/{}", snapshot_path, file);
+ auto full_dest_path = fmt::format("{}/{}", tablet_path, file);
+ if (link(full_src_path.c_str(), full_dest_path.c_str()) != 0) {
+ LOG(WARNING) << "failed to link file from " << full_src_path << "
to " << full_dest_path
+ << ", err: " << std::strerror(errno);
+
+ // clean the already linked files
+ for (auto& linked_file : linked_files) {
+ remove(linked_file.c_str());
}
- linked_files.push_back(full_dest_path);
- VLOG_CRITICAL << "link file from " << full_src_path << " to " <<
full_dest_path;
- }
- } else {
- LOG(FATAL) << "only support overwrite now";
- __builtin_unreachable();
+ return Status::InternalError("move tablet failed");
+ }
+ linked_files.push_back(full_dest_path);
+ VLOG_CRITICAL << "link file from " << full_src_path << " to " <<
full_dest_path;
}
// snapshot loader not need to change tablet uid
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]