This is an automated email from the ASF dual-hosted git repository.
Gabriel39 pushed a commit to branch mc-test-branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/mc-test-branch-4.1 by this
push:
new 123f165483a Fix parquet row group reader lifetime issues (#63101)
123f165483a is described below
commit 123f165483a51c707921ff180cc4ea97d00470a3
Author: Gabriel <[email protected]>
AuthorDate: Sat May 9 15:43:37 2026 +0800
Fix parquet row group reader lifetime issues (#63101)
---
be/src/format/parquet/vparquet_group_reader.cpp | 5 +++--
be/src/format/parquet/vparquet_group_reader.h | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/be/src/format/parquet/vparquet_group_reader.cpp
b/be/src/format/parquet/vparquet_group_reader.cpp
index 6531ce9dd8f..1c8aa5ee390 100644
--- a/be/src/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/format/parquet/vparquet_group_reader.cpp
@@ -162,8 +162,9 @@ RowGroupReader::RowGroupReader(io::FileReaderSPtr
file_reader,
_filter_column_ids(filter_column_ids) {}
RowGroupReader::~RowGroupReader() {
- _column_readers.clear();
- _obj_pool->clear();
+ if (_obj_pool != nullptr) {
+ _obj_pool->clear();
+ }
}
Status RowGroupReader::init(
diff --git a/be/src/format/parquet/vparquet_group_reader.h
b/be/src/format/parquet/vparquet_group_reader.h
index 6d514500a57..208d3995b90 100644
--- a/be/src/format/parquet/vparquet_group_reader.h
+++ b/be/src/format/parquet/vparquet_group_reader.h
@@ -252,8 +252,6 @@ private:
Status _append_iceberg_rowid_column(Block* block, size_t read_rows, bool
is_current_row_ids);
io::FileReaderSPtr _file_reader;
- std::unordered_map<std::string, std::unique_ptr<ParquetColumnReader>>
- _column_readers; // table_column_name
std::vector<std::string> _read_table_columns;
const int32_t _row_group_id;
@@ -265,6 +263,9 @@ private:
std::shared_ptr<RowLineageColumns> _row_lineage_columns;
// merge the row ranges generated from page index and position delete.
RowRanges _read_ranges;
+ // ParquetColumnReader keeps a reference to _read_ranges, so readers must
be destroyed first.
+ std::unordered_map<std::string, std::unique_ptr<ParquetColumnReader>>
+ _column_readers; // table_column_name
LazyReadContext _lazy_read_ctx;
int64_t _lazy_read_filtered_rows = 0;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]