This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new 3a7b5dc4bf [enhancement](branch1.1) add missed_version log information
#14285
3a7b5dc4bf is described below
commit 3a7b5dc4bf71934bfe219be2752192b832665b70
Author: AlexYue <[email protected]>
AuthorDate: Tue Nov 15 17:18:02 2022 +0800
[enhancement](branch1.1) add missed_version log information #14285
Previously inside the _make_and_download_snapshots function we don't log
the missed_version information, this pr aims to make it's easier to trace the
versions during clone task.
---
be/src/olap/olap_common.h | 21 ++++++++----
be/src/olap/task/engine_clone_task.cpp | 61 +++++++++++++++++++---------------
be/src/olap/task/engine_clone_task.h | 4 +--
3 files changed, 50 insertions(+), 36 deletions(-)
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 6c7c0e598d..0bd462e896 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -168,10 +168,10 @@ enum FieldAggregationMethod {
enum OLAPCompressionType {
// Compression algorithm used for network transmission, low compression
rate, low cpu overhead
OLAP_COMP_TRANSPORT = 1,
- // Compression algorithm used for hard disk data, with high compression
rate and high CPU overhead
- OLAP_COMP_STORAGE = 2,
- // The compression algorithm used for storage, the compression rate is
low, and the cpu overhead is low
- OLAP_COMP_LZ4 = 3,
+ // Compression algorithm used for hard disk data, with high compression
rate and high CPU overhead
+ OLAP_COMP_STORAGE = 2,
+ // The compression algorithm used for storage, the compression rate is
low, and the cpu overhead is low
+ OLAP_COMP_LZ4 = 3,
};
enum PushType {
@@ -216,6 +216,13 @@ inline std::ostream& operator<<(std::ostream& os, const
Version& version) {
return os << "[" << version.first << "-" << version.second << "]";
}
+inline std::ostream& operator<<(std::ostream& os, const Versions& versions) {
+ for (auto& version : versions) {
+ os << version;
+ }
+ return os;
+}
+
// used for hash-struct of hash_map<Version, Rowset*>.
struct HashOfVersion {
size_t operator()(const Version& version) const {
@@ -290,12 +297,12 @@ struct OlapReaderStatistics {
// general_debug_ns is designed for the purpose of DEBUG, to record any
infomations of debugging or profiling.
// different from specific meaningful timer such as index_load_ns,
general_debug_ns can be used flexibly.
// general_debug_ns has associated with OlapScanNode's
_general_debug_timer already.
- // so general_debug_ns' values will update to _general_debug_timer
automaticly,
+ // so general_debug_ns' values will update to _general_debug_timer
automaticly,
// the timer result can be checked through QueryProfile web page easily.
- // when search general_debug_ns, you can find that general_debug_ns has
not been used,
+ // when search general_debug_ns, you can find that general_debug_ns has
not been used,
// this is because such codes added for debug purpose should not commit,
it's just for debuging.
// so, please do not delete general_debug_ns defined here
- // usage example:
+ // usage example:
// SCOPED_RAW_TIMER(&_stats->general_debug_ns[1]);
int64_t general_debug_ns[GENERAL_DEBUG_COUNT] = {};
};
diff --git a/be/src/olap/task/engine_clone_task.cpp
b/be/src/olap/task/engine_clone_task.cpp
index c1d42ce855..30319ee6c2 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -73,6 +73,9 @@ OLAPStatus EngineCloneTask::_do_clone() {
TabletSharedPtr tablet =
StorageEngine::instance()->tablet_manager()->get_tablet(
_clone_req.tablet_id, _clone_req.schema_hash);
bool is_new_tablet = tablet == nullptr;
+
+ // try to incremental clone
+ std::vector<Version> missed_versions;
// try to repair a tablet with missing version
if (tablet != nullptr) {
ReadLock migration_rlock(tablet->get_migration_lock(),
std::try_to_lock);
@@ -84,8 +87,6 @@ OLAPStatus EngineCloneTask::_do_clone() {
string local_data_path = tablet->tablet_path_desc().filepath +
CLONE_PREFIX;
bool allow_incremental_clone = false;
- // try to incremental clone
- std::vector<Version> missed_versions;
tablet->calc_missed_versions(_clone_req.committed_version,
&missed_versions);
// if missed version size is 0, then it is useless to clone from
remote be, it means local data is
@@ -101,7 +102,7 @@ OLAPStatus EngineCloneTask::_do_clone() {
// if tablet on src backend does not contains missing version, it will
download all versions,
// and set allow_incremental_clone to false
status = _make_and_download_snapshots(*(tablet->data_dir()),
local_data_path, &src_host,
- &src_file_path, _error_msgs,
&missed_versions,
+ &src_file_path, _error_msgs,
missed_versions,
&allow_incremental_clone);
LOG(INFO) << "tablet exist with number of missing version: " <<
missed_versions.size()
@@ -146,7 +147,7 @@ OLAPStatus EngineCloneTask::_do_clone() {
if (status.ok()) {
bool allow_incremental_clone = false;
status = _make_and_download_snapshots(*store,
tablet_dir_stream.str(), &src_host,
- &src_file_path, _error_msgs,
nullptr,
+ &src_file_path, _error_msgs,
missed_versions,
&allow_incremental_clone);
}
@@ -262,10 +263,12 @@ void EngineCloneTask::_set_tablet_info(Status status,
bool is_new_tablet) {
/// 2. Download all snapshots to CLONE dir.
/// 3. Convert rowset ids of downloaded snapshots.
/// 4. Release the snapshots on source BE.
-Status EngineCloneTask::_make_and_download_snapshots(
- DataDir& data_dir, const string& local_data_path, TBackend* src_host,
string* snapshot_path,
- std::vector<string>* error_msgs, const std::vector<Version>*
missed_versions,
- bool* allow_incremental_clone) {
+Status EngineCloneTask::_make_and_download_snapshots(DataDir& data_dir,
+ const string&
local_data_path,
+ TBackend* src_host,
string* snapshot_path,
+ std::vector<string>*
error_msgs,
+ const
std::vector<Version>& missed_versions,
+ bool*
allow_incremental_clone) {
Status status = Status::OK();
std::string local_path = local_data_path + "/";
@@ -288,13 +291,15 @@ Status EngineCloneTask::_make_and_download_snapshots(
LOG(INFO) << "success to make snapshot. ip=" << src.host << ",
port=" << src.be_port
<< ", tablet=" << _clone_req.tablet_id
<< ", schema_hash=" << _clone_req.schema_hash
- << ", snapshot_path=" << *snapshot_path << ",
signature=" << _signature;
+ << ", snapshot_path=" << *snapshot_path << ",
signature=" << _signature
+ << ", missed_version=" << missed_versions;
status = Status::OK();
} else {
LOG(WARNING) << "fail to make snapshot, ip=" << src.host << ",
port=" << src.be_port
<< ", tablet=" << _clone_req.tablet_id
<< ", schema_hash=" << _clone_req.schema_hash
- << ", signature=" << _signature << ", error=" <<
st.to_string();
+ << ", signature=" << _signature << ",
missed_version=" << missed_versions
+ << ", error=" << st.to_string();
error_msgs->push_back("make snapshot failed. backend_ip: " +
src_host->host);
status = Status::InternalError("Failed to make snapshot");
@@ -352,20 +357,18 @@ Status EngineCloneTask::_make_and_download_snapshots(
Status EngineCloneTask::_make_snapshot(const std::string& ip, int port,
TTableId tablet_id,
TSchemaHash schema_hash, int timeout_s,
- const std::vector<Version>*
missed_versions,
+ const std::vector<Version>&
missed_versions,
std::string* snapshot_path, bool*
allow_incremental_clone,
int32_t* snapshot_version) {
TSnapshotRequest request;
request.__set_tablet_id(tablet_id);
request.__set_schema_hash(schema_hash);
request.__set_preferred_snapshot_version(g_Types_constants.TPREFER_SNAPSHOT_REQ_VERSION);
- if (missed_versions != nullptr) {
- // TODO: missing version composed of singleton delta.
- // if not, this place should be rewrote.
- request.__isset.missing_version = true;
- for (auto& version : *missed_versions) {
- request.missing_version.push_back(version.first);
- }
+ // TODO: missing version composed of singleton delta.
+ // if not, this place should be rewrote.
+ request.__isset.missing_version = !(missed_versions.empty());
+ for (auto& version : missed_versions) {
+ request.missing_version.push_back(version.first);
}
if (timeout_s > 0) {
request.__set_timeout(timeout_s);
@@ -541,7 +544,8 @@ OLAPStatus EngineCloneTask::_finish_clone(Tablet* tablet,
const string& clone_di
string cloned_tablet_meta_file =
clone_dir + "/" + std::to_string(tablet->tablet_id()) +
".hdr";
TabletMeta cloned_tablet_meta;
- if ((res =
cloned_tablet_meta.create_from_file(cloned_tablet_meta_file)) != OLAP_SUCCESS) {
+ if ((res =
cloned_tablet_meta.create_from_file(cloned_tablet_meta_file)) !=
+ OLAP_SUCCESS) {
LOG(WARNING) << "fail to load src header when clone. "
<< ", cloned_tablet_meta_file=" <<
cloned_tablet_meta_file;
break;
@@ -551,9 +555,11 @@ OLAPStatus EngineCloneTask::_finish_clone(Tablet* tablet,
const string& clone_di
// check all files in /clone and /tablet
set<string> clone_files;
- Status ret = FileUtils::list_dirs_files(clone_dir, nullptr,
&clone_files, Env::Default());
+ Status ret =
+ FileUtils::list_dirs_files(clone_dir, nullptr,
&clone_files, Env::Default());
if (!ret.ok()) {
- LOG(WARNING) << "failed to list clone dir when clone.
[clone_dir=" << clone_dir << "]"
+ LOG(WARNING) << "failed to list clone dir when clone.
[clone_dir=" << clone_dir
+ << "]"
<< " error: " << ret.to_string();
res = OLAP_ERR_DISK_FAILURE;
break;
@@ -563,8 +569,8 @@ OLAPStatus EngineCloneTask::_finish_clone(Tablet* tablet,
const string& clone_di
string tablet_dir = tablet->tablet_path_desc().filepath;
ret = FileUtils::list_dirs_files(tablet_dir, nullptr,
&local_files, Env::Default());
if (!ret.ok()) {
- LOG(WARNING) << "failed to list local tablet dir when clone.
[tablet_dir=" << tablet_dir
- << "]"
+ LOG(WARNING) << "failed to list local tablet dir when clone.
[tablet_dir="
+ << tablet_dir << "]"
<< " error: " << ret.to_string();
res = OLAP_ERR_DISK_FAILURE;
break;
@@ -576,7 +582,8 @@ OLAPStatus EngineCloneTask::_finish_clone(Tablet* tablet,
const string& clone_di
for (const string& clone_file : clone_files) {
if (local_files.find(clone_file) != local_files.end()) {
VLOG_NOTICE << "find same file when clone, skip it. "
- << "tablet=" << tablet->full_name() << ",
clone_file=" << clone_file;
+ << "tablet=" << tablet->full_name()
+ << ", clone_file=" << clone_file;
continue;
}
@@ -756,9 +763,9 @@ OLAPStatus EngineCloneTask::_finish_full_clone(Tablet*
tablet, TabletMeta* clone
// but some rowset is useless, so that remove them here
for (auto& rs_meta_ptr : rs_metas_found_in_src) {
RowsetSharedPtr rowset_to_remove;
- auto s =
-
RowsetFactory::create_rowset(&(cloned_tablet_meta->tablet_schema()),
-
tablet->tablet_path_desc().filepath, rs_meta_ptr, &rowset_to_remove);
+ auto s =
RowsetFactory::create_rowset(&(cloned_tablet_meta->tablet_schema()),
+
tablet->tablet_path_desc().filepath, rs_meta_ptr,
+ &rowset_to_remove);
if (s != OLAP_SUCCESS) {
LOG(WARNING) << "failed to init rowset to remove: "
<< rs_meta_ptr->rowset_id().to_string();
diff --git a/be/src/olap/task/engine_clone_task.h
b/be/src/olap/task/engine_clone_task.h
index 3cb883ae31..ccd4df4298 100644
--- a/be/src/olap/task/engine_clone_task.h
+++ b/be/src/olap/task/engine_clone_task.h
@@ -53,7 +53,7 @@ private:
Status _make_and_download_snapshots(DataDir& data_dir, const string&
local_data_path, TBackend* src_host,
string* src_file_path, vector<string>* error_msgs,
- const vector<Version>* missing_versions, bool*
allow_incremental_clone);
+ const vector<Version>& missing_versions, bool*
allow_incremental_clone);
void _set_tablet_info(Status status, bool is_new_tablet);
@@ -63,7 +63,7 @@ private:
Status _make_snapshot(const std::string& ip, int port, TTableId tablet_id,
TSchemaHash schema_hash, int timeout_s,
- const std::vector<Version>* missed_versions,
std::string* snapshot_path,
+ const std::vector<Version>& missed_versions,
std::string* snapshot_path,
bool* allow_incremental_clone, int32_t*
snapshot_version);
Status _release_snapshot(const std::string& ip, int port, const
std::string& snapshot_path);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]