This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0348b336018 [fix](parquet-reader) Fixed the issue of excessive
scanning data in late materialization case of parquet reader (#46121)
0348b336018 is described below
commit 0348b336018644953aea8e71fa636239c3096dca
Author: Qi Chen <[email protected]>
AuthorDate: Mon Dec 30 22:48:53 2024 +0800
[fix](parquet-reader) Fixed the issue of excessive scanning data in late
materialization case of parquet reader (#46121)
### What problem does this PR solve?
Related PR: #40641
Problem Summary:
[Fix](parquet-reader) Fixed the issue of excessive scanning data in late
materialization case of parquet reader introduced by #40641 in
scenarios with particularly high filtering rates.
---
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index a9854b53f3b..770ed1f02ac 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -522,16 +522,18 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t
batch_size, size_t* re
Block::erase_useless_column(block, origin_column_num);
if (!pre_eof) {
- if (pre_raw_read_rows >= config::doris_scanner_row_num) {
- break;
- }
// If continuous batches are skipped, we can cache them to
skip a whole page
_cached_filtered_rows += pre_read_rows;
+ if (pre_raw_read_rows >= config::doris_scanner_row_num) {
+ *read_rows = 0;
+ _convert_dict_cols_to_string_cols(block);
+ return Status::OK();
+ }
} else { // pre_eof
// If filter_map_ptr->filter_all() and pre_eof, we can skip
whole row group.
*read_rows = 0;
*batch_eof = true;
- _lazy_read_filtered_rows += pre_read_rows;
+ _lazy_read_filtered_rows += (pre_read_rows +
_cached_filtered_rows);
_convert_dict_cols_to_string_cols(block);
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]