This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new d159a61a9d9 [branch-4.1](variant) Skip full footer scan when 
constructing VariantStatsCaculator (#62819) (#63072)
d159a61a9d9 is described below

commit d159a61a9d9e924006a2afb9fd2146a62ebbe4df
Author: Chenyang Sun <[email protected]>
AuthorDate: Sat May 9 15:30:27 2026 +0800

    [branch-4.1](variant) Skip full footer scan when constructing 
VariantStatsCaculator (#62819) (#63072)
    
    pick from master #62819
---
 be/src/storage/segment/segment_writer.cpp          |  7 +++--
 .../storage/segment/variant_stats_calculator.cpp   | 10 ++++---
 be/src/storage/segment/variant_stats_calculator.h  |  8 +++--
 .../segment/variant_stats_calculator_test.cpp      | 35 ++++++++++++++++++++++
 4 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/be/src/storage/segment/segment_writer.cpp 
b/be/src/storage/segment/segment_writer.cpp
index 976ab0c2e2e..521c3ca2e84 100644
--- a/be/src/storage/segment/segment_writer.cpp
+++ b/be/src/storage/segment/segment_writer.cpp
@@ -330,11 +330,14 @@ Status SegmentWriter::init(const std::vector<uint32_t>& 
col_ids, bool has_key) {
         _opts.compression_type = _tablet_schema->compression_type();
     }
 
+    // Vertical compaction calls init() multiple times against the same 
writer; the footer accumulates entries
+    // across calls, so this init()'s slice of footer columns starts at the 
current size.
+    const int variant_stats_footer_offset = _footer.columns_size();
     RETURN_IF_ERROR(_create_writers(_tablet_schema, col_ids));
 
     // Initialize variant statistics calculator
-    _variant_stats_calculator =
-            std::make_unique<VariantStatsCaculator>(&_footer, _tablet_schema, 
col_ids);
+    _variant_stats_calculator = std::make_unique<VariantStatsCaculator>(
+            &_footer, _tablet_schema, col_ids, variant_stats_footer_offset);
 
     // we don't need the short key index for unique key merge on write table.
     if (_has_key) {
diff --git a/be/src/storage/segment/variant_stats_calculator.cpp 
b/be/src/storage/segment/variant_stats_calculator.cpp
index b1bffb7673d..bc160282c18 100644
--- a/be/src/storage/segment/variant_stats_calculator.cpp
+++ b/be/src/storage/segment/variant_stats_calculator.cpp
@@ -30,10 +30,12 @@ namespace doris::segment_v2 {
 
 VariantStatsCaculator::VariantStatsCaculator(SegmentFooterPB* footer,
                                              TabletSchemaSPtr tablet_schema,
-                                             const std::vector<uint32_t>& 
column_ids)
+                                             const std::vector<uint32_t>& 
column_ids,
+                                             int footer_column_offset)
         : _footer(footer), _tablet_schema(tablet_schema), 
_column_ids(column_ids) {
-    // Build the path to footer index mapping during initialization
-    for (int i = 0; i < _footer->columns_size(); ++i) {
+    // Only walk this init()'s slice of footer entries; earlier init() calls 
(vertical compaction's previous
+    // column groups) are not addressable via `column_ids` and would only 
inflate this scan.
+    for (int i = footer_column_offset; i < _footer->columns_size(); ++i) {
         const auto& column = _footer->columns(i);
         // path that need to record stats
         if (column.has_column_path_info() &&
@@ -119,4 +121,4 @@ void 
VariantStatsCaculator::_calculate_sub_column_stats(const IColumn& column,
 
 #include "common/compile_check_end.h"
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git a/be/src/storage/segment/variant_stats_calculator.h 
b/be/src/storage/segment/variant_stats_calculator.h
index e096697131a..4caf07540a2 100644
--- a/be/src/storage/segment/variant_stats_calculator.h
+++ b/be/src/storage/segment/variant_stats_calculator.h
@@ -29,8 +29,12 @@ namespace doris::segment_v2 {
 
 class VariantStatsCaculator {
 public:
+    // `footer_column_offset` is the index of the first footer entry that 
belongs to this init()'s `column_ids`.
+    // Required because SegmentWriter::init() can be invoked multiple times 
(vertical compaction) against
+    // an ever-growing footer; without the offset every additional init() 
would re-scan the whole footer.
     explicit VariantStatsCaculator(SegmentFooterPB* footer, TabletSchemaSPtr 
tablet_schema,
-                                   const std::vector<uint32_t>& column_ids);
+                                   const std::vector<uint32_t>& column_ids,
+                                   int footer_column_offset = 0);
 
     // Calculate variant statistics for the given column and block
     Status calculate_variant_stats(const Block* block, size_t row_pos, size_t 
num_rows);
@@ -54,4 +58,4 @@ private:
                                      size_t row_pos, size_t num_rows);
 };
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git a/be/test/storage/segment/variant_stats_calculator_test.cpp 
b/be/test/storage/segment/variant_stats_calculator_test.cpp
index ffdfc230901..66c3050370c 100644
--- a/be/test/storage/segment/variant_stats_calculator_test.cpp
+++ b/be/test/storage/segment/variant_stats_calculator_test.cpp
@@ -447,4 +447,39 @@ TEST_F(VariantStatsCalculatorTest, 
CalculateVariantStatsWithExtendedSchema) {
     EXPECT_TRUE(status.ok());
 }
 
+TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithFooterOffset) {
+    // Vertical compaction calls SegmentWriter::init() multiple times against
+    // the same writer (key columns first, then each value-column group). The
+    // footer accumulates entries across calls, so the calculator built for the
+    // second init() must only index its own slice — not the leftover entries
+    // from the first init(). The offset tells the constructor where its slice
+    // starts and also where stats results should land.
+    add_footer_column_with_path(1, "stale_from_prev_init"); // pre-existing
+    add_footer_column_with_path(1, "another_stale_entry");  // pre-existing
+    const int footer_offset = _footer->columns_size();
+    add_footer_column_with_path(1, "sub_column"); // belongs to this init()
+
+    TabletColumn sub_column = create_variant_column(2, "variant.sub_column", 
1, "sub_column");
+    _tablet_schema->append_column(sub_column);
+
+    std::vector<uint32_t> column_ids = {0};
+    VariantStatsCaculator calculator(_footer.get(), _tablet_schema, 
column_ids, footer_offset);
+
+    Block block;
+    auto nullable_column = create_nullable_column({false, true, false}, {"a", 
"", "c"});
+    block.insert({std::move(nullable_column),
+                  
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+                  "sub_column"});
+
+    auto status = calculator.calculate_variant_stats(&block, 0, 3);
+    EXPECT_TRUE(status.ok());
+
+    // Stats land on this init()'s slice, not the pre-existing entries — proves
+    // we ignored the stale entries even though they share parent_unique_id=1
+    // and the same path keys would otherwise collide in the index map.
+    EXPECT_EQ(_footer->columns(0).none_null_size(), 0);
+    EXPECT_EQ(_footer->columns(1).none_null_size(), 0);
+    EXPECT_EQ(_footer->columns(footer_offset).none_null_size(), 2);
+}
+
 } // namespace doris::segment_v2
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to