This is an automated email from the ASF dual-hosted git repository.

Gabriel39 pushed a commit to branch mc-test-branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/mc-test-branch-4.1 by this 
push:
     new 123f165483a Fix parquet row group reader lifetime issues (#63101)
123f165483a is described below

commit 123f165483a51c707921ff180cc4ea97d00470a3
Author: Gabriel <[email protected]>
AuthorDate: Sat May 9 15:43:37 2026 +0800

    Fix parquet row group reader lifetime issues (#63101)
---
 be/src/format/parquet/vparquet_group_reader.cpp | 5 +++--
 be/src/format/parquet/vparquet_group_reader.h   | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/be/src/format/parquet/vparquet_group_reader.cpp 
b/be/src/format/parquet/vparquet_group_reader.cpp
index 6531ce9dd8f..1c8aa5ee390 100644
--- a/be/src/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/format/parquet/vparquet_group_reader.cpp
@@ -162,8 +162,9 @@ RowGroupReader::RowGroupReader(io::FileReaderSPtr 
file_reader,
           _filter_column_ids(filter_column_ids) {}
 
 RowGroupReader::~RowGroupReader() {
-    _column_readers.clear();
-    _obj_pool->clear();
+    if (_obj_pool != nullptr) {
+        _obj_pool->clear();
+    }
 }
 
 Status RowGroupReader::init(
diff --git a/be/src/format/parquet/vparquet_group_reader.h 
b/be/src/format/parquet/vparquet_group_reader.h
index 6d514500a57..208d3995b90 100644
--- a/be/src/format/parquet/vparquet_group_reader.h
+++ b/be/src/format/parquet/vparquet_group_reader.h
@@ -252,8 +252,6 @@ private:
     Status _append_iceberg_rowid_column(Block* block, size_t read_rows, bool 
is_current_row_ids);
 
     io::FileReaderSPtr _file_reader;
-    std::unordered_map<std::string, std::unique_ptr<ParquetColumnReader>>
-            _column_readers; // table_column_name
     std::vector<std::string> _read_table_columns;
 
     const int32_t _row_group_id;
@@ -265,6 +263,9 @@ private:
     std::shared_ptr<RowLineageColumns> _row_lineage_columns;
     // merge the row ranges generated from page index and position delete.
     RowRanges _read_ranges;
+    // ParquetColumnReader keeps a reference to _read_ranges, so readers must 
be destroyed first.
+    std::unordered_map<std::string, std::unique_ptr<ParquetColumnReader>>
+            _column_readers; // table_column_name
 
     LazyReadContext _lazy_read_ctx;
     int64_t _lazy_read_filtered_rows = 0;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to