This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 3fdd7fea35e branch-4.1: [opt](storage) disable dict encoding in row 
store columns #63438 (#63465)
3fdd7fea35e is described below

commit 3fdd7fea35ec694397e01232885865050e9fec49
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 22 09:31:14 2026 +0800

    branch-4.1: [opt](storage) disable dict encoding in row store columns 
#63438 (#63465)
    
    Cherry-picked from #63438
    
    Co-authored-by: lihangyu <[email protected]>
---
 be/src/storage/segment/segment_writer.cpp          |  6 ++
 be/src/storage/segment/vertical_segment_writer.cpp |  6 ++
 .../storage/segment/column_meta_accessor_test.cpp  | 71 ++++++++++++++++++++++
 3 files changed, 83 insertions(+)

diff --git a/be/src/storage/segment/segment_writer.cpp 
b/be/src/storage/segment/segment_writer.cpp
index 521c3ca2e84..426c7c2491e 100644
--- a/be/src/storage/segment/segment_writer.cpp
+++ b/be/src/storage/segment/segment_writer.cpp
@@ -296,6 +296,12 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, 
const TabletColumn& co
         auto page_size = _tablet_schema->row_store_page_size();
         opts.data_page_size =
                 (page_size > 0) ? page_size : 
segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
+        // Row store data is already serialized as a single blob. Keep it on 
plain pages
+        // to avoid introducing dictionary pages for the hidden row store 
column.
+        
opts.meta->set_encoding(_tablet_schema->binary_plain_encoding_default_impl() ==
+                                                
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2
+                                        ? PLAIN_ENCODING_V2
+                                        : PLAIN_ENCODING);
     }
 
     opts.rowset_ctx = _opts.rowset_ctx;
diff --git a/be/src/storage/segment/vertical_segment_writer.cpp 
b/be/src/storage/segment/vertical_segment_writer.cpp
index c9ac91128a9..a95dc380c55 100644
--- a/be/src/storage/segment/vertical_segment_writer.cpp
+++ b/be/src/storage/segment/vertical_segment_writer.cpp
@@ -296,6 +296,12 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
         auto page_size = _tablet_schema->row_store_page_size();
         opts.data_page_size =
                 (page_size > 0) ? page_size : 
segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
+        // Row store data is already serialized as a single blob. Keep it on 
plain pages
+        // to avoid introducing dictionary pages for the hidden row store 
column.
+        
opts.meta->set_encoding(_tablet_schema->binary_plain_encoding_default_impl() ==
+                                                
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2
+                                        ? PLAIN_ENCODING_V2
+                                        : PLAIN_ENCODING);
     }
 
     opts.rowset_ctx = _opts.rowset_ctx;
diff --git a/be/test/storage/segment/column_meta_accessor_test.cpp 
b/be/test/storage/segment/column_meta_accessor_test.cpp
index 25443db7bc0..bdcdd39d8c8 100644
--- a/be/test/storage/segment/column_meta_accessor_test.cpp
+++ b/be/test/storage/segment/column_meta_accessor_test.cpp
@@ -24,6 +24,7 @@
 #include <string>
 #include <vector>
 
+#include "common/consts.h"
 #include "core/field.h"
 #include "io/fs/local_file_system.h"
 #include "storage/segment/segment.h"
@@ -45,6 +46,19 @@ std::string make_test_file_path(const std::string& 
file_name) {
     return std::string(kTestDir) + "/" + file_name;
 }
 
+TabletColumnPtr create_row_store_test_column(int32_t id) {
+    auto column = std::make_shared<TabletColumn>();
+    column->_unique_id = id;
+    column->_col_name = BeConsts::ROW_STORE_COL;
+    column->_type = FieldType::OLAP_FIELD_TYPE_STRING;
+    column->_is_key = false;
+    column->_is_nullable = true;
+    column->_aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
+    column->_length = 2147483643;
+    column->_index_length = 4;
+    return column;
+}
+
 // Helper to write segment footer trailer (footer + metadata)
 Status append_footer_trailer(io::FileWriter* fw, SegmentFooterPB* footer) {
     std::string footer_buf;
@@ -677,6 +691,63 @@ TEST(ColumnMetaAccessorTest, 
FooterSizeWithManyColumnsExternalVsInline) {
     EXPECT_LT(external_footer_size, inline_footer_size / 10);
 }
 
+TEST(ColumnMetaAccessorTest, RowStoreColumnDoesNotUseDictEncoding) {
+    constexpr int32_t kRowStoreUid = 1;
+
+    auto fs = io::global_local_filesystem();
+    static_cast<void>(fs->delete_directory(kTestDir));
+    ASSERT_TRUE(fs->create_directory(kTestDir).ok());
+
+    auto key_column = std::make_shared<TabletColumn>();
+    key_column->_unique_id = 0;
+    key_column->_col_name = "k0";
+    key_column->_type = FieldType::OLAP_FIELD_TYPE_INT;
+    key_column->_is_key = true;
+    key_column->_is_nullable = false;
+    key_column->_length = 4;
+    key_column->_index_length = 4;
+
+    std::vector<TabletColumnPtr> columns;
+    columns.emplace_back(std::move(key_column));
+    columns.emplace_back(create_row_store_test_column(kRowStoreUid));
+
+    auto tablet_schema = create_schema(columns, UNIQUE_KEYS);
+    tablet_schema->set_binary_plain_encoding_default_impl(
+            BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2);
+
+    SegmentWriterOptions opts;
+    opts.enable_unique_key_merge_on_write = false;
+
+    auto generator = [](size_t rid, int cid, Field& field) {
+        if (cid == 0) {
+            field = Field::create_field<TYPE_INT>(static_cast<int32_t>(rid));
+            return;
+        }
+        field = Field::create_field<TYPE_STRING>("row-store-" + 
std::to_string(rid));
+    };
+
+    std::shared_ptr<Segment> segment;
+    std::string segment_path;
+    build_segment(opts, tablet_schema,
+                  /*segment_id=*/0, tablet_schema,
+                  /*nrows=*/8, generator, &segment, std::string(kTestDir), 
&segment_path);
+    ASSERT_NE(segment, nullptr);
+
+    io::FileReaderSPtr reader;
+    io::FileReaderOptions reader_opts;
+    ASSERT_TRUE(fs->open_file(segment_path, &reader, &reader_opts).ok());
+
+    SegmentFooterPB footer;
+    ASSERT_TRUE(read_footer_from_file(reader, &footer).ok());
+    ASSERT_EQ(2, footer.columns_size());
+
+    const auto& row_store_meta = footer.columns(1);
+    EXPECT_EQ(kRowStoreUid, row_store_meta.unique_id());
+    EXPECT_EQ(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRING), 
row_store_meta.type());
+    EXPECT_EQ(PLAIN_ENCODING_V2, row_store_meta.encoding());
+    EXPECT_NE(DICT_ENCODING, row_store_meta.encoding());
+}
+
 // Test concurrent access (thread safety not guaranteed by ColumnMetaAccessor 
itself,
 // but test that multiple sequential calls work correctly)
 TEST(ColumnMetaAccessorTest, MultipleSequentialAccesses) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to