xy720 commented on code in PR #43642:
URL: https://github.com/apache/doris/pull/43642#discussion_r1865884659
##########
be/src/runtime/snapshot_loader.cpp:
##########
@@ -120,6 +123,127 @@ Status SnapshotLoader::init(TStorageBackendType::type
type, const std::string& l
SnapshotLoader::~SnapshotLoader() = default;
+static Status list_segment_inverted_index_file(io::RemoteFileSystem* cold_fs,
+ const std::string& dir, const
std::string& rowset,
+ std::vector<std::string>*
remote_files) {
+ bool exists = true;
+ std::vector<io::FileInfo> files;
+ RETURN_IF_ERROR(cold_fs->list(dir, true, &files, &exists));
+ for (auto& tmp_file : files) {
+ io::Path path(tmp_file.file_name);
+ std::string file_name = path.filename();
+
+ if (file_name.substr(0, rowset.length()).compare(rowset) != 0 ||
+ !_end_with(file_name, ".idx")) {
+ continue;
+ }
+ remote_files->push_back(file_name);
+ }
+
+ return Status::OK();
+}
+
+static Status download_and_upload_one_file(io::RemoteFileSystem& dest_fs,
+ io::RemoteFileSystem* cold_fs,
+ const std::string& remote_seg_path,
+ const std::string& local_seg_path,
+ const std::string& dest_seg_path) {
+ RETURN_IF_ERROR(cold_fs->download(remote_seg_path, local_seg_path));
+
+ // calc md5sum of localfile
+ std::string md5sum;
+ RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(local_seg_path,
&md5sum));
+
+ RETURN_IF_ERROR(upload_with_checksum(dest_fs, local_seg_path,
dest_seg_path, md5sum));
+
+ //delete local file
+
RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(local_seg_path));
+
+ return Status::OK();
+}
+
+static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t
tablet_id,
+ const std::string& local_path, const
std::string& dest_path,
+ io::RemoteFileSystem* cold_fs, const
std::string& rowset_id,
+ int segments, int have_inverted_index) {
+ Status res = Status::OK();
+
+ std::string remote_tablet_path = fmt::format("{}/{}", DATA_PREFIX,
tablet_id);
+
+ for (int i = 0; i < segments; i++) {
+ std::string remote_seg_path = fmt::format("{}/{}_{}.dat",
remote_tablet_path, rowset_id, i);
+ std::string local_seg_path = fmt::format("{}/{}_{}.dat", local_path,
rowset_id, i);
+ std::string dest_seg_path = fmt::format("{}/{}_{}.dat", dest_path,
rowset_id, i);
+
+ RETURN_IF_ERROR(download_and_upload_one_file(dest_fs, cold_fs,
remote_seg_path,
+ local_seg_path,
dest_seg_path));
+ }
+
+ if (!have_inverted_index) {
+ return res;
+ }
+
+ std::vector<std::string> remote_index_files;
+ RETURN_IF_ERROR(list_segment_inverted_index_file(cold_fs,
remote_tablet_path, rowset_id,
Review Comment:
May be we don't need to list remote files here. We can get index_id in
rowset meta, index_version in tablet schema. And use
remote_idx_v1_path/remote_idx_v2_path function in storage_policy.h to get the
remote path.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]