This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9519d7ede98 [enhancement](be-ut)Add more indexed column reader be unit
test (#25652)
9519d7ede98 is described below
commit 9519d7ede989fbd219c6b473d0a798fabd57b677
Author: abmdocrt <[email protected]>
AuthorDate: Mon Oct 23 10:12:53 2023 +0800
[enhancement](be-ut)Add more indexed column reader be unit test (#25652)
Added more unit tests
1. key exists or does not exist in a single page
2. key exists or does not exist in multiple pages
3. key is between two pages.
---
be/src/olap/primary_key_index.h | 3 +
.../olap/rowset/segment_v2/indexed_column_writer.h | 3 +
be/test/olap/primary_key_index_test.cpp | 153 +++++++++++++++++++++
3 files changed, 159 insertions(+)
diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h
index 233644b4e07..59b88c2f724 100644
--- a/be/src/olap/primary_key_index.h
+++ b/be/src/olap/primary_key_index.h
@@ -67,6 +67,9 @@ public:
uint64_t disk_size() const { return _disk_size; }
+ // used for be ut
+ uint32_t data_page_num() const { return
_primary_key_index_builder->data_page_num(); }
+
Slice min_key() { return Slice(_min_key.data(), _min_key.size() -
_seq_col_length); }
Slice max_key() { return Slice(_max_key.data(), _max_key.size() -
_seq_col_length); }
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
index ba61708dd90..ecb26782ad1 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
@@ -22,6 +22,7 @@
#include <stdint.h>
#include <cstddef>
+#include <cstdint>
#include <memory>
#include "common/status.h"
@@ -85,6 +86,8 @@ public:
uint64_t disk_size() const { return _disk_size; }
+ uint32_t data_page_num() const { return _num_data_pages + 1; }
+
private:
Status _finish_current_data_page(size_t& num_val);
diff --git a/be/test/olap/primary_key_index_test.cpp
b/be/test/olap/primary_key_index_test.cpp
index 4de6be24feb..fb96e7411e6 100644
--- a/be/test/olap/primary_key_index_test.cpp
+++ b/be/test/olap/primary_key_index_test.cpp
@@ -167,4 +167,157 @@ TEST_F(PrimaryKeyIndexTest, builder) {
}
}
+TEST_F(PrimaryKeyIndexTest, multiple_pages) {
+ std::string filename = kTestDir + "/multiple_pages";
+ io::FileWriterPtr file_writer;
+ auto fs = io::global_local_filesystem();
+ EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
+
+ config::primary_key_data_page_size = 5 * 5;
+ PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+ static_cast<void>(builder.init());
+ size_t num_rows = 0;
+ std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
+ "00010", "00012", "00014", "00016",
"00018"};
+ for (const std::string& key : keys) {
+ static_cast<void>(builder.add_item(key));
+ num_rows++;
+ }
+ EXPECT_EQ("00000", builder.min_key().to_string());
+ EXPECT_EQ("00018", builder.max_key().to_string());
+ EXPECT_EQ(builder.size(), 2 * 5 * 5);
+ EXPECT_GT(builder.data_page_num(), 1);
+ segment_v2::PrimaryKeyIndexMetaPB index_meta;
+ EXPECT_TRUE(builder.finalize(&index_meta));
+ EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
+ EXPECT_TRUE(file_writer->close().ok());
+ EXPECT_EQ(num_rows, builder.num_rows());
+
+ PrimaryKeyIndexReader index_reader;
+ io::FileReaderSPtr file_reader;
+ EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
+ EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
+ EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
+ EXPECT_EQ(num_rows, index_reader.num_rows());
+
+ std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+ EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+ bool exact_match = false;
+ uint32_t row_id;
+ for (size_t i = 0; i < keys.size(); i++) {
+ bool exists = index_reader.check_present(keys[i]);
+ EXPECT_TRUE(exists);
+ auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_TRUE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+ for (size_t i = 0; i < keys.size(); i++) {
+ bool exists = index_reader.check_present(keys[i]);
+ EXPECT_TRUE(exists);
+ auto status = index_iterator->seek_to_ordinal(i);
+ EXPECT_TRUE(status.ok());
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+ {
+ auto status = index_iterator->seek_to_ordinal(10);
+ EXPECT_TRUE(status.ok());
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(10, row_id);
+ }
+
+ std::vector<std::string> non_exist_keys {"00001", "00003", "00005",
"00007", "00009",
+ "00011", "00013", "00015",
"00017"};
+ for (size_t i = 0; i < non_exist_keys.size(); i++) {
+ Slice slice(non_exist_keys[i]);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_FALSE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i + 1, row_id);
+ }
+ {
+ string key("00019");
+ Slice slice(key);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_FALSE(exact_match);
+ EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
+ }
+}
+
+TEST_F(PrimaryKeyIndexTest, single_page) {
+ std::string filename = kTestDir + "/single_page";
+ io::FileWriterPtr file_writer;
+ auto fs = io::global_local_filesystem();
+ EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
+ config::primary_key_data_page_size = 32768;
+
+ PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+ static_cast<void>(builder.init());
+ size_t num_rows = 0;
+ std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
+ "00010", "00012", "00014", "00016",
"00018"};
+ for (const std::string& key : keys) {
+ static_cast<void>(builder.add_item(key));
+ num_rows++;
+ }
+ EXPECT_EQ("00000", builder.min_key().to_string());
+ EXPECT_EQ("00018", builder.max_key().to_string());
+ EXPECT_EQ(builder.size(), 2 * 5 * 5);
+ EXPECT_EQ(builder.data_page_num(), 1);
+ segment_v2::PrimaryKeyIndexMetaPB index_meta;
+ EXPECT_TRUE(builder.finalize(&index_meta));
+ EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
+ EXPECT_TRUE(file_writer->close().ok());
+ EXPECT_EQ(num_rows, builder.num_rows());
+
+ PrimaryKeyIndexReader index_reader;
+ io::FileReaderSPtr file_reader;
+ EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
+ EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
+ EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
+ EXPECT_EQ(num_rows, index_reader.num_rows());
+
+ std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+ EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+ bool exact_match = false;
+ uint32_t row_id;
+ for (size_t i = 0; i < keys.size(); i++) {
+ bool exists = index_reader.check_present(keys[i]);
+ EXPECT_TRUE(exists);
+ auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_TRUE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+
+ std::vector<std::string> non_exist_keys {"00001", "00003", "00005",
"00007", "00009",
+ "00011", "00013", "00015",
"00017"};
+ for (size_t i = 0; i < non_exist_keys.size(); i++) {
+ Slice slice(non_exist_keys[i]);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_FALSE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i + 1, row_id);
+ }
+ {
+ string key("00019");
+ Slice slice(key);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_FALSE(exact_match);
+ EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
+ }
+}
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]