This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new cf1efce824 [fix](inverted index) use index id instead of column uid to
determine whether a hard link is required when build index (#21574)
cf1efce824 is described below
commit cf1efce824cf6f6bd1cb716fd578ed0386e9a267
Author: YueW <[email protected]>
AuthorDate: Sun Jul 9 16:45:27 2023 +0800
[fix](inverted index) use index id instead of column uid to determine
whether a hard link is required when build index (#21574)
Fix problem:
For the same column, there are concurrent drop index request and build
index request, if build index obtain lock before drop index, build a new index
file, but when drop index request execute, link file not contains all index
files for the column, that lead to new index file is missed.
Based on the above questions, use index id instead of column unique id to
determine whether a hard link is required when do build index
---
be/src/common/status.h | 2 ++
be/src/olap/rowset/beta_rowset.cpp | 64 ++++++++++++++++++--------------------
be/src/olap/rowset/beta_rowset.h | 2 +-
be/src/olap/rowset/rowset.h | 2 +-
be/src/olap/task/index_builder.cpp | 42 ++++++++-----------------
be/src/olap/task/index_builder.h | 3 --
be/test/testutil/mock_rowset.h | 2 +-
7 files changed, 48 insertions(+), 69 deletions(-)
diff --git a/be/src/common/status.h b/be/src/common/status.h
index 538b7de24f..45f9286933 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -266,6 +266,7 @@ E(INVERTED_INDEX_BYPASS, -6004);
E(INVERTED_INDEX_NO_TERMS, -6005);
E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006);
E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007);
+E(INVERTED_INDEX_BUILD_WAITTING, -6008);
#undef E
} // namespace ErrorCode
@@ -296,6 +297,7 @@ constexpr bool capture_stacktrace() {
&& code != ErrorCode::INVERTED_INDEX_BYPASS
&& code != ErrorCode::INVERTED_INDEX_NO_TERMS
&& code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED
+ && code != ErrorCode::INVERTED_INDEX_BUILD_WAITTING
&& code != ErrorCode::META_KEY_NOT_FOUND
&& code != ErrorCode::PUSH_VERSION_ALREADY_EXIST
&& code != ErrorCode::TABLE_ALREADY_DELETED_ERROR
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index dd3073b0cd..d69621cf92 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -221,7 +221,7 @@ void BetaRowset::do_close() {
Status BetaRowset::link_files_to(const std::string& dir, RowsetId
new_rowset_id,
size_t new_rowset_start_seg_id,
- std::set<int32_t>* without_index_column_uids)
{
+ std::set<int32_t>* without_index_uids) {
DCHECK(is_local());
auto fs = _rowset_meta->fs();
if (!fs) {
@@ -246,45 +246,41 @@ Status BetaRowset::link_files_to(const std::string& dir,
RowsetId new_rowset_id,
<< "to=" << dst_path << ", errno=" << Errno::no();
return Status::Error<OS_ERROR>();
}
- for (auto& column : _schema->columns()) {
- if (without_index_column_uids != nullptr &&
- without_index_column_uids->count(column.unique_id())) {
+ for (auto& index : _schema->indexes()) {
+ if (index.index_type() != IndexType::INVERTED) {
continue;
}
- const TabletIndex* index_meta =
_schema->get_inverted_index(column.unique_id());
- if (index_meta) {
- std::string inverted_index_src_file_path =
- InvertedIndexDescriptor::get_index_file_name(src_path,
-
index_meta->index_id());
- std::string inverted_index_dst_file_path =
- InvertedIndexDescriptor::get_index_file_name(dst_path,
-
index_meta->index_id());
- bool need_to_link = true;
- if (_schema->skip_write_index_on_load()) {
- local_fs->exists(inverted_index_src_file_path,
&need_to_link);
- if (!need_to_link) {
- LOG(INFO) << "skip create hard link to not existed
file="
- << inverted_index_src_file_path;
- }
- }
+ auto index_id = index.index_id();
+ if (without_index_uids != nullptr &&
without_index_uids->count(index_id)) {
+ continue;
+ }
- if (need_to_link) {
- if (!local_fs->link_file(inverted_index_src_file_path,
- inverted_index_dst_file_path)
- .ok()) {
- LOG(WARNING)
- << "fail to create hard link. from=" <<
inverted_index_src_file_path
- << ", "
- << "to=" << inverted_index_dst_file_path
- << ", errno=" << Errno::no();
- return Status::Error<OS_ERROR>();
- }
- LOG(INFO) << "success to create hard link. from="
- << inverted_index_src_file_path << ", "
- << "to=" << inverted_index_dst_file_path;
+ std::string inverted_index_src_file_path =
+ InvertedIndexDescriptor::get_index_file_name(src_path,
index_id);
+ std::string inverted_index_dst_file_path =
+ InvertedIndexDescriptor::get_index_file_name(dst_path,
index_id);
+ bool need_to_link = true;
+ if (_schema->skip_write_index_on_load()) {
+ local_fs->exists(inverted_index_src_file_path, &need_to_link);
+ if (!need_to_link) {
+ LOG(INFO) << "skip create hard link to not existed file="
+ << inverted_index_src_file_path;
}
}
+ if (need_to_link) {
+ if (!local_fs->link_file(inverted_index_src_file_path,
inverted_index_dst_file_path)
+ .ok()) {
+ LOG(WARNING) << "fail to create hard link. from="
+ << inverted_index_src_file_path << ", "
+ << "to=" << inverted_index_dst_file_path
+ << ", errno=" << Errno::no();
+ return Status::Error<OS_ERROR>();
+ }
+ LOG(INFO) << "success to create hard link. from=" <<
inverted_index_src_file_path
+ << ", "
+ << "to=" << inverted_index_dst_file_path;
+ }
}
}
return Status::OK();
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 643782c461..064b8fcb6e 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -72,7 +72,7 @@ public:
Status link_files_to(const std::string& dir, RowsetId new_rowset_id,
size_t new_rowset_start_seg_id = 0,
- std::set<int32_t>* without_index_column_uids =
nullptr) override;
+ std::set<int32_t>* without_index_uids = nullptr)
override;
Status copy_files_to(const std::string& dir, const RowsetId&
new_rowset_id) override;
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 8829cc0770..d3a0c10447 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -203,7 +203,7 @@ public:
// hard link all files in this rowset to `dir` to form a new rowset with
id `new_rowset_id`.
virtual Status link_files_to(const std::string& dir, RowsetId
new_rowset_id,
size_t new_rowset_start_seg_id = 0,
- std::set<int32_t>* without_index_column_uids
= nullptr) = 0;
+ std::set<int32_t>* without_index_uids =
nullptr) = 0;
// copy all files to `dir`
virtual Status copy_files_to(const std::string& dir, const RowsetId&
new_rowset_id) = 0;
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index 1022daa113..1dc1d5294a 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -74,6 +74,18 @@ Status IndexBuilder::update_inverted_index_info() {
for (auto t_inverted_index : _alter_inverted_indexes) {
TabletIndex index;
index.init_from_thrift(t_inverted_index,
*input_rs_tablet_schema);
+ auto column_uid = index.col_unique_ids()[0];
+ const TabletIndex* exist_index =
+
output_rs_tablet_schema->get_inverted_index(column_uid);
+ if (exist_index && exist_index->index_id() !=
index.index_id()) {
+ // maybe there are concurrent drop index request did not
obtain the lock,
+ // so return error, to wait the drop index request
finished.
+ LOG(WARNING) << "column: " << column_uid << " has a exist
inverted index"
+ << ", but the index id not equal request's
index id, "
+ << ", exist index id: " <<
exist_index->index_id()
+ << ", request's index id: " <<
index.index_id();
+ return
Status::Error<ErrorCode::INVERTED_INDEX_BUILD_WAITTING>();
+ }
output_rs_tablet_schema->append_index(std::move(index));
}
}
@@ -94,11 +106,9 @@ Status IndexBuilder::update_inverted_index_info() {
if (!status.ok()) {
return Status::Error<ErrorCode::ROWSET_BUILDER_INIT>();
}
- std::set<int32_t> alter_column_ids_set =
-
_rowset_alter_index_column_ids[input_rowset->rowset_id().to_string()];
RETURN_IF_ERROR(input_rowset->link_files_to(_tablet->tablet_path(),
output_rs_writer->rowset_id(), 0,
- &alter_column_ids_set));
// build output rowset
+ &_alter_index_ids)); //
build output rowset
auto input_rowset_meta = input_rowset->rowset_meta();
RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>();
@@ -402,8 +412,6 @@ Status IndexBuilder::do_build_inverted_index() {
return Status::OK();
}
- _calc_alter_column_ids();
-
auto st = update_inverted_index_info();
if (!st.ok()) {
LOG(WARNING) << "failed to update_inverted_index_info. "
@@ -473,28 +481,4 @@ void IndexBuilder::gc_output_rowset() {
}
}
-Status IndexBuilder::_calc_alter_column_ids() {
- for (auto& rs : _input_rowsets) {
- RowsetId rowset_id = rs->rowset_id();
- auto rs_tablet_schema = rs->tablet_schema();
- std::set<int32_t> alter_column_uids;
- for (auto inverted_index : _alter_inverted_indexes) {
- auto column_name = inverted_index.columns[0];
- auto column_idx = rs_tablet_schema->field_index(column_name);
- if (column_idx < 0) {
- LOG(WARNING) << "referenced column was missing. "
- << "[column=" << column_name << "
referenced_column=" << column_idx
- << "]";
- continue;
- }
- auto column = rs_tablet_schema->column(column_idx);
- alter_column_uids.insert(column.unique_id());
- }
- _rowset_alter_index_column_ids.insert(
- std::make_pair(rowset_id.to_string(), alter_column_uids));
- }
-
- return Status::OK();
-}
-
} // namespace doris
diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h
index 9e406c22c1..2ff266f5a7 100644
--- a/be/src/olap/task/index_builder.h
+++ b/be/src/olap/task/index_builder.h
@@ -59,14 +59,11 @@ private:
const std::pair<int64_t, int64_t>& index_writer_sign,
Field* field,
const uint8_t* null_map, const uint8_t** ptr, size_t
num_rows);
- Status _calc_alter_column_ids();
-
private:
TabletSharedPtr _tablet;
std::vector<TColumn> _columns;
std::vector<doris::TOlapTableIndex> _alter_inverted_indexes;
bool _is_drop_op;
- std::unordered_map<std::string, std::set<int32_t>>
_rowset_alter_index_column_ids;
std::set<int32_t> _alter_index_ids;
std::vector<RowsetSharedPtr> _input_rowsets;
std::vector<RowsetSharedPtr> _output_rowsets;
diff --git a/be/test/testutil/mock_rowset.h b/be/test/testutil/mock_rowset.h
index a16a37e3cd..93b76d9c67 100644
--- a/be/test/testutil/mock_rowset.h
+++ b/be/test/testutil/mock_rowset.h
@@ -30,7 +30,7 @@ class MockRowset : public Rowset {
Status remove() override { return Status::NotSupported("MockRowset not
support this method."); }
Status link_files_to(const std::string& dir, RowsetId new_rowset_id,
size_t start_seg_id,
- std::set<int32_t>* without_index_column_uids)
override {
+ std::set<int32_t>* without_index_uids) override {
return Status::NotSupported("MockRowset not support this method.");
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]