This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 3fdd7fea35e branch-4.1: [opt](storage) disable dict encoding in row
store columns #63438 (#63465)
3fdd7fea35e is described below
commit 3fdd7fea35ec694397e01232885865050e9fec49
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 22 09:31:14 2026 +0800
branch-4.1: [opt](storage) disable dict encoding in row store columns
#63438 (#63465)
Cherry-picked from #63438
Co-authored-by: lihangyu <[email protected]>
---
be/src/storage/segment/segment_writer.cpp | 6 ++
be/src/storage/segment/vertical_segment_writer.cpp | 6 ++
.../storage/segment/column_meta_accessor_test.cpp | 71 ++++++++++++++++++++++
3 files changed, 83 insertions(+)
diff --git a/be/src/storage/segment/segment_writer.cpp
b/be/src/storage/segment/segment_writer.cpp
index 521c3ca2e84..426c7c2491e 100644
--- a/be/src/storage/segment/segment_writer.cpp
+++ b/be/src/storage/segment/segment_writer.cpp
@@ -296,6 +296,12 @@ Status SegmentWriter::_create_column_writer(uint32_t cid,
const TabletColumn& co
auto page_size = _tablet_schema->row_store_page_size();
opts.data_page_size =
(page_size > 0) ? page_size :
segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
+ // Row store data is already serialized as a single blob. Keep it on
plain pages
+ // to avoid introducing dictionary pages for the hidden row store
column.
+
opts.meta->set_encoding(_tablet_schema->binary_plain_encoding_default_impl() ==
+
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2
+ ? PLAIN_ENCODING_V2
+ : PLAIN_ENCODING);
}
opts.rowset_ctx = _opts.rowset_ctx;
diff --git a/be/src/storage/segment/vertical_segment_writer.cpp
b/be/src/storage/segment/vertical_segment_writer.cpp
index c9ac91128a9..a95dc380c55 100644
--- a/be/src/storage/segment/vertical_segment_writer.cpp
+++ b/be/src/storage/segment/vertical_segment_writer.cpp
@@ -296,6 +296,12 @@ Status
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
auto page_size = _tablet_schema->row_store_page_size();
opts.data_page_size =
(page_size > 0) ? page_size :
segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
+ // Row store data is already serialized as a single blob. Keep it on
plain pages
+ // to avoid introducing dictionary pages for the hidden row store
column.
+
opts.meta->set_encoding(_tablet_schema->binary_plain_encoding_default_impl() ==
+
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2
+ ? PLAIN_ENCODING_V2
+ : PLAIN_ENCODING);
}
opts.rowset_ctx = _opts.rowset_ctx;
diff --git a/be/test/storage/segment/column_meta_accessor_test.cpp
b/be/test/storage/segment/column_meta_accessor_test.cpp
index 25443db7bc0..bdcdd39d8c8 100644
--- a/be/test/storage/segment/column_meta_accessor_test.cpp
+++ b/be/test/storage/segment/column_meta_accessor_test.cpp
@@ -24,6 +24,7 @@
#include <string>
#include <vector>
+#include "common/consts.h"
#include "core/field.h"
#include "io/fs/local_file_system.h"
#include "storage/segment/segment.h"
@@ -45,6 +46,19 @@ std::string make_test_file_path(const std::string&
file_name) {
return std::string(kTestDir) + "/" + file_name;
}
+TabletColumnPtr create_row_store_test_column(int32_t id) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = BeConsts::ROW_STORE_COL;
+ column->_type = FieldType::OLAP_FIELD_TYPE_STRING;
+ column->_is_key = false;
+ column->_is_nullable = true;
+ column->_aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
+ column->_length = 2147483643;
+ column->_index_length = 4;
+ return column;
+}
+
// Helper to write segment footer trailer (footer + metadata)
Status append_footer_trailer(io::FileWriter* fw, SegmentFooterPB* footer) {
std::string footer_buf;
@@ -677,6 +691,63 @@ TEST(ColumnMetaAccessorTest,
FooterSizeWithManyColumnsExternalVsInline) {
EXPECT_LT(external_footer_size, inline_footer_size / 10);
}
+TEST(ColumnMetaAccessorTest, RowStoreColumnDoesNotUseDictEncoding) {
+ constexpr int32_t kRowStoreUid = 1;
+
+ auto fs = io::global_local_filesystem();
+ static_cast<void>(fs->delete_directory(kTestDir));
+ ASSERT_TRUE(fs->create_directory(kTestDir).ok());
+
+ auto key_column = std::make_shared<TabletColumn>();
+ key_column->_unique_id = 0;
+ key_column->_col_name = "k0";
+ key_column->_type = FieldType::OLAP_FIELD_TYPE_INT;
+ key_column->_is_key = true;
+ key_column->_is_nullable = false;
+ key_column->_length = 4;
+ key_column->_index_length = 4;
+
+ std::vector<TabletColumnPtr> columns;
+ columns.emplace_back(std::move(key_column));
+ columns.emplace_back(create_row_store_test_column(kRowStoreUid));
+
+ auto tablet_schema = create_schema(columns, UNIQUE_KEYS);
+ tablet_schema->set_binary_plain_encoding_default_impl(
+ BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2);
+
+ SegmentWriterOptions opts;
+ opts.enable_unique_key_merge_on_write = false;
+
+ auto generator = [](size_t rid, int cid, Field& field) {
+ if (cid == 0) {
+ field = Field::create_field<TYPE_INT>(static_cast<int32_t>(rid));
+ return;
+ }
+ field = Field::create_field<TYPE_STRING>("row-store-" +
std::to_string(rid));
+ };
+
+ std::shared_ptr<Segment> segment;
+ std::string segment_path;
+ build_segment(opts, tablet_schema,
+ /*segment_id=*/0, tablet_schema,
+ /*nrows=*/8, generator, &segment, std::string(kTestDir),
&segment_path);
+ ASSERT_NE(segment, nullptr);
+
+ io::FileReaderSPtr reader;
+ io::FileReaderOptions reader_opts;
+ ASSERT_TRUE(fs->open_file(segment_path, &reader, &reader_opts).ok());
+
+ SegmentFooterPB footer;
+ ASSERT_TRUE(read_footer_from_file(reader, &footer).ok());
+ ASSERT_EQ(2, footer.columns_size());
+
+ const auto& row_store_meta = footer.columns(1);
+ EXPECT_EQ(kRowStoreUid, row_store_meta.unique_id());
+ EXPECT_EQ(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRING),
row_store_meta.type());
+ EXPECT_EQ(PLAIN_ENCODING_V2, row_store_meta.encoding());
+ EXPECT_NE(DICT_ENCODING, row_store_meta.encoding());
+}
+
// Test concurrent access (thread safety not guaranteed by ColumnMetaAccessor
itself,
// but test that multiple sequential calls work correctly)
TEST(ColumnMetaAccessorTest, MultipleSequentialAccesses) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]