This is an automated email from the ASF dual-hosted git repository.

jerryjing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new f889cad694 [core][format] Format Table plan partitions should ignore 
hidden & illegal dirs (#6522)
f889cad694 is described below

commit f889cad694bb3c4579e9b3ab0c75ca627fd71806
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Nov 4 12:30:10 2025 +0800

    [core][format] Format Table plan partitions should ignore hidden & illegal 
dirs (#6522)
    
    * [core][format] Format Table plan partitions should ignore hidden & 
illegal dirs
---
 .../paimon/table/format/FormatTableScan.java       | 13 ++--
 .../apache/paimon/utils/PartitionPathUtils.java    | 25 +++++--
 .../paimon/table/format/FormatTableScanTest.java   | 79 +++++++++++++++++++++-
 3 files changed, 102 insertions(+), 15 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java 
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
index f9a3f0067c..ac4c29d134 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
@@ -151,6 +151,7 @@ public class FormatTableScan implements InnerTableScan {
     }
 
     private List<Pair<LinkedHashMap<String, String>, Path>> findPartitions() {
+        boolean onlyValueInPath = 
coreOptions.formatTablePartitionOnlyValueInPath();
         if (partitionFilter instanceof MultiplePartitionPredicate) {
             // generate partitions directly
             Set<BinaryRow> partitions = ((MultiplePartitionPredicate) 
partitionFilter).partitions();
@@ -160,7 +161,7 @@ public class FormatTableScan implements InnerTableScan {
                     table.defaultPartName(),
                     new Path(table.location()),
                     partitions,
-                    coreOptions.formatTablePartitionOnlyValueInPath());
+                    onlyValueInPath);
         } else {
             // search paths
             Pair<Path, Integer> scanPathAndLevel =
@@ -169,15 +170,13 @@ public class FormatTableScan implements InnerTableScan {
                             table.partitionKeys(),
                             partitionFilter,
                             table.partitionType(),
-                            coreOptions.formatTablePartitionOnlyValueInPath());
-            Path scanPath = scanPathAndLevel.getLeft();
-            int level = scanPathAndLevel.getRight();
+                            onlyValueInPath);
             return searchPartSpecAndPaths(
                     table.fileIO(),
-                    scanPath,
-                    level,
+                    scanPathAndLevel.getLeft(),
+                    scanPathAndLevel.getRight(),
                     table.partitionKeys(),
-                    coreOptions.formatTablePartitionOnlyValueInPath());
+                    onlyValueInPath);
         }
     }
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java 
b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
index 1aade5fbda..880e4dfe67 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
@@ -24,8 +24,6 @@ import org.apache.paimon.fs.Path;
 import org.apache.paimon.types.DataField;
 import org.apache.paimon.types.RowType;
 
-import javax.annotation.Nullable;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.BitSet;
@@ -272,8 +270,8 @@ public class PartitionPathUtils {
             FileIO fileIO,
             Path path,
             int partitionNumber,
-            @Nullable List<String> partitionKeys,
-            boolean enablePartitionOnlyValueInPath) {
+            List<String> partitionKeys,
+            boolean onlyValueInPath) {
         FileStatus[] generatedParts = getFileStatusRecurse(path, 
partitionNumber, fileIO);
         List<Pair<LinkedHashMap<String, String>, Path>> ret = new 
ArrayList<>();
         for (FileStatus part : generatedParts) {
@@ -281,14 +279,19 @@ public class PartitionPathUtils {
             if (isHiddenFile(part)) {
                 continue;
             }
-            if (enablePartitionOnlyValueInPath && partitionKeys != null) {
+            if (onlyValueInPath) {
                 ret.add(
                         Pair.of(
                                 extractPartitionSpecFromPathOnlyValue(
                                         part.getPath(), partitionKeys),
                                 part.getPath()));
             } else {
-                ret.add(Pair.of(extractPartitionSpecFromPath(part.getPath()), 
part.getPath()));
+                LinkedHashMap<String, String> spec = 
extractPartitionSpecFromPath(part.getPath());
+                if (spec.size() != partitionKeys.size()) {
+                    // illegal path, for example: /path/to/table/tmp/unknown, 
path without "="
+                    continue;
+                }
+                ret.add(Pair.of(spec, part.getPath()));
             }
         }
         return ret;
@@ -314,6 +317,10 @@ public class PartitionPathUtils {
             int expectLevel,
             List<FileStatus> results)
             throws IOException {
+        if (isHiddenFile(fileStatus.getPath())) {
+            return;
+        }
+
         if (expectLevel == level) {
             results.add(fileStatus);
             return;
@@ -327,7 +334,11 @@ public class PartitionPathUtils {
     }
 
     private static boolean isHiddenFile(FileStatus fileStatus) {
-        String name = fileStatus.getPath().getName();
+        return isHiddenFile(fileStatus.getPath());
+    }
+
+    private static boolean isHiddenFile(Path path) {
+        String name = path.getName();
         return name.startsWith("_") || name.startsWith(".");
     }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
index 5bf13d9209..ac49164ea8 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
@@ -161,7 +161,7 @@ public class FormatTableScanTest {
                         partitionType,
                         enablePartitionValueOnly);
 
-        // Should optimize to specific partition path for first key
+        // Should not be optimized because of greater than
         assertThat(result.getLeft()).isEqualTo(tableLocation);
         assertThat(result.getRight()).isEqualTo(2);
 
@@ -202,6 +202,7 @@ public class FormatTableScanTest {
                         partitionType,
                         enablePartitionValueOnly);
         String partitionPath = enablePartitionValueOnly ? "2023/12" : 
"year=2023/month=12";
+
         // Should optimize to specific partition path
         assertThat(result.getLeft().toString()).isEqualTo(tableLocation + 
partitionPath);
         assertThat(result.getRight()).isEqualTo(0);
@@ -265,6 +266,82 @@ public class FormatTableScanTest {
         assertThat(searched.size()).isEqualTo(1);
     }
 
+    @TestTemplate
+    void testNoOptimizationWithSecondEquality() throws IOException {
+        Path tableLocation = new Path(tmpPath.toUri());
+        // Create equality predicate for only the second partition key
+        PredicateBuilder builder = new PredicateBuilder(partitionType);
+        Predicate predicate =
+                PredicateBuilder.and(builder.greaterOrEqual(0, 2023), 
builder.equal(1, 12));
+        PartitionPredicate partitionFilter =
+                PartitionPredicate.fromPredicate(partitionType, predicate);
+
+        Pair<Path, Integer> result =
+                FormatTableScan.computeScanPathAndLevel(
+                        tableLocation,
+                        partitionKeys,
+                        partitionFilter,
+                        partitionType,
+                        enablePartitionValueOnly);
+
+        // Should not optimize with second equality filter
+        assertThat(result.getLeft()).isEqualTo(tableLocation);
+        assertThat(result.getRight()).isEqualTo(2);
+
+        // test searchPartSpecAndPaths
+        LocalFileIO fileIO = LocalFileIO.create();
+        String partitionPath = enablePartitionValueOnly ? "2023/12" : 
"year=2023/month=12";
+        fileIO.mkdirs(new Path(tableLocation, partitionPath));
+        List<Pair<LinkedHashMap<String, String>, Path>> searched =
+                searchPartSpecAndPaths(
+                        fileIO,
+                        result.getLeft(),
+                        result.getRight(),
+                        partitionKeys,
+                        enablePartitionValueOnly);
+        LinkedHashMap<String, String> expectPartitionSpec =
+                new LinkedHashMap<>(partitionKeys.size());
+        expectPartitionSpec.put("year", "2023");
+        expectPartitionSpec.put("month", "12");
+        assertThat(searched.get(0).getLeft()).isEqualTo(expectPartitionSpec);
+        assertThat(searched.size()).isEqualTo(1);
+    }
+
+    @TestTemplate
+    void testSkipIllegalPath() throws IOException {
+        Path tableLocation = new Path(tmpPath.toUri());
+        PartitionPredicate partitionFilter = 
PartitionPredicate.fromPredicate(partitionType, null);
+        Pair<Path, Integer> result =
+                FormatTableScan.computeScanPathAndLevel(
+                        tableLocation,
+                        partitionKeys,
+                        partitionFilter,
+                        partitionType,
+                        enablePartitionValueOnly);
+
+        LocalFileIO fileIO = LocalFileIO.create();
+        String illegalPath =
+                enablePartitionValueOnly
+                        ? "_unknown-year/unknown-month"
+                        : "unknown-year/unknown-month";
+        fileIO.mkdirs(new Path(tableLocation, illegalPath));
+        String partitionPath = enablePartitionValueOnly ? "2023/12" : 
"year=2023/month=12";
+        fileIO.mkdirs(new Path(tableLocation, partitionPath));
+        List<Pair<LinkedHashMap<String, String>, Path>> searched =
+                searchPartSpecAndPaths(
+                        fileIO,
+                        result.getLeft(),
+                        result.getRight(),
+                        partitionKeys,
+                        enablePartitionValueOnly);
+        LinkedHashMap<String, String> expectPartitionSpec =
+                new LinkedHashMap<>(partitionKeys.size());
+        expectPartitionSpec.put("year", "2023");
+        expectPartitionSpec.put("month", "12");
+        assertThat(searched.get(0).getLeft()).isEqualTo(expectPartitionSpec);
+        assertThat(searched.size()).isEqualTo(1);
+    }
+
     @TestTemplate
     void testComputeScanPathAndLevel() {
         Path tableLocation = new Path(tmpPath.toUri());

Reply via email to