This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 24b51b9035 [fix](compaction) segcompaction coredump if the rowset
starts with a big segment (#14174) (#14176)
24b51b9035 is described below
commit 24b51b90357943801aa5654c1a6fd3ccd796e4c5
Author: zhengyu <[email protected]>
AuthorDate: Mon Nov 14 09:54:08 2022 +0800
[fix](compaction) segcompaction coredump if the rowset starts with a big
segment (#14174) (#14176)
Signed-off-by: freemandealer <[email protected]>
Signed-off-by: freemandealer <[email protected]>
---
be/src/olap/rowset/beta_rowset_writer.cpp | 1 +
be/test/olap/segcompaction_test.cpp | 125 ++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+)
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 78b85b9df3..65b78b288a 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -206,6 +206,7 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t
begin, int64_t end)
Status BetaRowsetWriter::_rename_compacted_segment_plain(uint64_t seg_id) {
if (seg_id == _num_segcompacted) {
+ ++_num_segcompacted;
return Status::OK();
}
diff --git a/be/test/olap/segcompaction_test.cpp
b/be/test/olap/segcompaction_test.cpp
index ca2dc28f04..205fa0cbbd 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -91,6 +91,7 @@ public:
delete l_engine;
l_engine = nullptr;
}
+ config::enable_segcompaction = false;
}
protected:
@@ -167,6 +168,7 @@ protected:
tablet_schema->init_from_pb(tablet_schema_pb);
}
+ // use different id to avoid conflict
void create_rowset_writer_context(int64_t id, TabletSchemaSPtr
tablet_schema,
RowsetWriterContext*
rowset_writer_context) {
RowsetId rowset_id;
@@ -449,6 +451,129 @@ TEST_F(SegCompactionTest,
SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
}
}
+TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) {
+ config::enable_segcompaction = true;
+ config::enable_storage_vectorization = true;
+ Status s;
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ create_tablet_schema(tablet_schema);
+
+ RowsetSharedPtr rowset;
+ config::segcompaction_small_threshold = 6000; // set threshold above
+ config::segcompaction_threshold_segment_num = 5;
+ std::vector<uint32_t> segment_num_rows;
+ { // write `num_segments * rows_per_segment` rows to rowset
+ RowsetWriterContext writer_context;
+ create_rowset_writer_context(10049, tablet_schema, &writer_context);
+
+ std::unique_ptr<RowsetWriter> rowset_writer;
+ s = RowsetFactory::create_rowset_writer(writer_context,
&rowset_writer);
+ EXPECT_EQ(Status::OK(), s);
+
+ RowCursor input_row;
+ input_row.init(tablet_schema);
+
+ // for segment "i", row "rid"
+ // k1 := rid*10 + i
+ // k2 := k1 * 10
+ // k3 := 4096 * i + rid
+ int num_segments = 1;
+ uint32_t rows_per_segment = 6400;
+ for (int i = 0; i < num_segments; ++i) {
+ MemPool mem_pool;
+ for (int rid = 0; rid < rows_per_segment; ++rid) {
+ uint32_t k1 = rid * 100 + i;
+ uint32_t k2 = i;
+ uint32_t k3 = rid;
+ input_row.set_field_content(0, reinterpret_cast<char*>(&k1),
&mem_pool);
+ input_row.set_field_content(1, reinterpret_cast<char*>(&k2),
&mem_pool);
+ input_row.set_field_content(2, reinterpret_cast<char*>(&k3),
&mem_pool);
+ s = rowset_writer->add_row(input_row);
+ EXPECT_EQ(Status::OK(), s);
+ }
+ s = rowset_writer->flush();
+ EXPECT_EQ(Status::OK(), s);
+ }
+ num_segments = 1;
+ rows_per_segment = 4096;
+ for (int i = 0; i < num_segments; ++i) {
+ MemPool mem_pool;
+ for (int rid = 0; rid < rows_per_segment; ++rid) {
+ uint32_t k1 = rid * 100 + i;
+ uint32_t k2 = i;
+ uint32_t k3 = rid;
+ input_row.set_field_content(0, reinterpret_cast<char*>(&k1),
&mem_pool);
+ input_row.set_field_content(1, reinterpret_cast<char*>(&k2),
&mem_pool);
+ input_row.set_field_content(2, reinterpret_cast<char*>(&k3),
&mem_pool);
+ s = rowset_writer->add_row(input_row);
+ EXPECT_EQ(Status::OK(), s);
+ }
+ s = rowset_writer->flush();
+ EXPECT_EQ(Status::OK(), s);
+ }
+ num_segments = 1;
+ rows_per_segment = 6400;
+ for (int i = 0; i < num_segments; ++i) {
+ MemPool mem_pool;
+ for (int rid = 0; rid < rows_per_segment; ++rid) {
+ uint32_t k1 = rid * 100 + i;
+ uint32_t k2 = i;
+ uint32_t k3 = rid;
+ input_row.set_field_content(0, reinterpret_cast<char*>(&k1),
&mem_pool);
+ input_row.set_field_content(1, reinterpret_cast<char*>(&k2),
&mem_pool);
+ input_row.set_field_content(2, reinterpret_cast<char*>(&k3),
&mem_pool);
+ s = rowset_writer->add_row(input_row);
+ EXPECT_EQ(Status::OK(), s);
+ }
+ s = rowset_writer->flush();
+ EXPECT_EQ(Status::OK(), s);
+ }
+ num_segments = 1;
+ rows_per_segment = 4096;
+ for (int i = 0; i < num_segments; ++i) {
+ MemPool mem_pool;
+ for (int rid = 0; rid < rows_per_segment; ++rid) {
+ uint32_t k1 = rid * 100 + i;
+ uint32_t k2 = i;
+ uint32_t k3 = rid;
+ input_row.set_field_content(0, reinterpret_cast<char*>(&k1),
&mem_pool);
+ input_row.set_field_content(1, reinterpret_cast<char*>(&k2),
&mem_pool);
+ input_row.set_field_content(2, reinterpret_cast<char*>(&k3),
&mem_pool);
+ s = rowset_writer->add_row(input_row);
+ EXPECT_EQ(Status::OK(), s);
+ }
+ s = rowset_writer->flush();
+ EXPECT_EQ(Status::OK(), s);
+ }
+ num_segments = 1;
+ rows_per_segment = 6400;
+ for (int i = 0; i < num_segments; ++i) {
+ MemPool mem_pool;
+ for (int rid = 0; rid < rows_per_segment; ++rid) {
+ uint32_t k1 = rid * 100 + i;
+ uint32_t k2 = i;
+ uint32_t k3 = rid;
+ input_row.set_field_content(0, reinterpret_cast<char*>(&k1),
&mem_pool);
+ input_row.set_field_content(1, reinterpret_cast<char*>(&k2),
&mem_pool);
+ input_row.set_field_content(2, reinterpret_cast<char*>(&k3),
&mem_pool);
+ s = rowset_writer->add_row(input_row);
+ EXPECT_EQ(Status::OK(), s);
+ }
+ s = rowset_writer->flush();
+ EXPECT_EQ(Status::OK(), s);
+ }
+
+ rowset = rowset_writer->build();
+ std::vector<std::string> ls;
+ ls.push_back("10049_0.dat"); // O
+ ls.push_back("10049_1.dat"); // o
+ ls.push_back("10049_2.dat"); // O
+ ls.push_back("10049_3.dat"); // o
+ ls.push_back("10049_4.dat"); // O
+ EXPECT_TRUE(check_dir(ls));
+ }
+}
+
} // namespace doris
// @brief Test Stub
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]