This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 49f4c8ce37 [parquet] Simplify row ranges when read empty columns
49f4c8ce37 is described below

commit 49f4c8ce376c40945f00c8ecc06bb7faa7f12967
Author: JingsongLi <[email protected]>
AuthorDate: Mon Aug 18 19:27:17 2025 +0800

    [parquet] Simplify row ranges when read empty columns
---
 .../apache/parquet/hadoop/ParquetFileReader.java   | 62 ++++++++--------------
 1 file changed, 22 insertions(+), 40 deletions(-)

diff --git 
a/paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java 
b/paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index cdf723c524..16523e6fe3 100644
--- 
a/paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ 
b/paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -98,7 +98,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Objects;
-import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
@@ -803,57 +802,40 @@ public class ParquetFileReader implements Closeable {
 
         RowRanges rowRanges = blockRowRanges.get(blockIndex);
         if (rowRanges == null) {
-            BlockMetaData block = blocks.get(blockIndex);
-            rowRanges = RowRanges.createSingle(block.getRowCount());
-            if (selection != null) {
-                RowRanges result = calculateRowRanges(blockIndex, selection);
-                rowRanges = RowRanges.intersection(result, rowRanges);
-            }
-
-            if (filteringRequired) {
-                RowRanges result =
-                        ColumnIndexFilter.calculateRowRanges(
-                                options.getRecordFilter(),
-                                getColumnIndexStore(blockIndex),
-                                paths.keySet(),
-                                block.getRowCount());
-                rowRanges = RowRanges.intersection(result, rowRanges);
-            }
+            rowRanges = calculateRowRanges(blockIndex);
             blockRowRanges.set(blockIndex, rowRanges);
         }
         return rowRanges;
     }
 
-    private RowRanges calculateRowRanges(int blockIndex, RoaringBitmap32 
selection) {
-        List<OffsetIndex> offsets;
+    private RowRanges calculateRowRanges(int blockIndex) {
         BlockMetaData block = blocks.get(blockIndex);
-        if (paths.isEmpty()) {
-            Optional<ColumnChunkMetaData> first = 
block.getColumns().stream().findFirst();
-            if (first.isPresent()) {
-                ColumnPath path = first.get().getPath();
-                OffsetIndex index =
-                        ColumnIndexStoreImpl.create(this, block, 
Collections.singleton(path))
-                                .getOffsetIndex(path);
-                offsets = Collections.singletonList(index);
-            } else {
-                offsets = Collections.emptyList();
-            }
-        } else {
+        RowRanges rowRanges = RowRanges.createSingle(block.getRowCount());
+        if (selection != null) {
             ColumnIndexStore store = getColumnIndexStore(blockIndex);
-            offsets =
-                    
paths.keySet().stream().map(store::getOffsetIndex).collect(Collectors.toList());
-        }
-
-        long rowCount = block.getRowCount();
-        long rowIndexOffset = block.getRowIndexOffset();
-        RowRanges rowRanges = RowRanges.createSingle(rowCount);
-        for (OffsetIndex offset : offsets) {
-            if (offset != null) {
+            List<OffsetIndex> offsets =
+                    paths.keySet().stream()
+                            .map(store::getOffsetIndex)
+                            .filter(Objects::nonNull)
+                            .collect(Collectors.toList());
+            long rowCount = block.getRowCount();
+            long rowIndexOffset = block.getRowIndexOffset();
+            for (OffsetIndex offset : offsets) {
+                // avoiding creating too many ranges, just filter columns pages
                 RowRanges result = RowRanges.create(rowCount, rowIndexOffset, 
offset, selection);
                 rowRanges = RowRanges.intersection(result, rowRanges);
             }
         }
 
+        if (FilterCompat.isFilteringRequired(options.getRecordFilter())) {
+            RowRanges result =
+                    ColumnIndexFilter.calculateRowRanges(
+                            options.getRecordFilter(),
+                            getColumnIndexStore(blockIndex),
+                            paths.keySet(),
+                            block.getRowCount());
+            rowRanges = RowRanges.intersection(result, rowRanges);
+        }
         return rowRanges;
     }
 

Reply via email to