This is an automated email from the ASF dual-hosted git repository.
jerryjing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new f889cad694 [core][format] Format Table plan partitions should ignore
hidden & illegal dirs (#6522)
f889cad694 is described below
commit f889cad694bb3c4579e9b3ab0c75ca627fd71806
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Nov 4 12:30:10 2025 +0800
[core][format] Format Table plan partitions should ignore hidden & illegal
dirs (#6522)
* [core][format] Format Table plan partitions should ignore hidden &
illegal dirs
---
.../paimon/table/format/FormatTableScan.java | 13 ++--
.../apache/paimon/utils/PartitionPathUtils.java | 25 +++++--
.../paimon/table/format/FormatTableScanTest.java | 79 +++++++++++++++++++++-
3 files changed, 102 insertions(+), 15 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
index f9a3f0067c..ac4c29d134 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
@@ -151,6 +151,7 @@ public class FormatTableScan implements InnerTableScan {
}
private List<Pair<LinkedHashMap<String, String>, Path>> findPartitions() {
+ boolean onlyValueInPath =
coreOptions.formatTablePartitionOnlyValueInPath();
if (partitionFilter instanceof MultiplePartitionPredicate) {
// generate partitions directly
Set<BinaryRow> partitions = ((MultiplePartitionPredicate)
partitionFilter).partitions();
@@ -160,7 +161,7 @@ public class FormatTableScan implements InnerTableScan {
table.defaultPartName(),
new Path(table.location()),
partitions,
- coreOptions.formatTablePartitionOnlyValueInPath());
+ onlyValueInPath);
} else {
// search paths
Pair<Path, Integer> scanPathAndLevel =
@@ -169,15 +170,13 @@ public class FormatTableScan implements InnerTableScan {
table.partitionKeys(),
partitionFilter,
table.partitionType(),
- coreOptions.formatTablePartitionOnlyValueInPath());
- Path scanPath = scanPathAndLevel.getLeft();
- int level = scanPathAndLevel.getRight();
+ onlyValueInPath);
return searchPartSpecAndPaths(
table.fileIO(),
- scanPath,
- level,
+ scanPathAndLevel.getLeft(),
+ scanPathAndLevel.getRight(),
table.partitionKeys(),
- coreOptions.formatTablePartitionOnlyValueInPath());
+ onlyValueInPath);
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
index 1aade5fbda..880e4dfe67 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
@@ -24,8 +24,6 @@ import org.apache.paimon.fs.Path;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.RowType;
-import javax.annotation.Nullable;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
@@ -272,8 +270,8 @@ public class PartitionPathUtils {
FileIO fileIO,
Path path,
int partitionNumber,
- @Nullable List<String> partitionKeys,
- boolean enablePartitionOnlyValueInPath) {
+ List<String> partitionKeys,
+ boolean onlyValueInPath) {
FileStatus[] generatedParts = getFileStatusRecurse(path,
partitionNumber, fileIO);
List<Pair<LinkedHashMap<String, String>, Path>> ret = new
ArrayList<>();
for (FileStatus part : generatedParts) {
@@ -281,14 +279,19 @@ public class PartitionPathUtils {
if (isHiddenFile(part)) {
continue;
}
- if (enablePartitionOnlyValueInPath && partitionKeys != null) {
+ if (onlyValueInPath) {
ret.add(
Pair.of(
extractPartitionSpecFromPathOnlyValue(
part.getPath(), partitionKeys),
part.getPath()));
} else {
- ret.add(Pair.of(extractPartitionSpecFromPath(part.getPath()),
part.getPath()));
+ LinkedHashMap<String, String> spec =
extractPartitionSpecFromPath(part.getPath());
+ if (spec.size() != partitionKeys.size()) {
+ // illegal path, for example: /path/to/table/tmp/unknown,
path without "="
+ continue;
+ }
+ ret.add(Pair.of(spec, part.getPath()));
}
}
return ret;
@@ -314,6 +317,10 @@ public class PartitionPathUtils {
int expectLevel,
List<FileStatus> results)
throws IOException {
+ if (isHiddenFile(fileStatus.getPath())) {
+ return;
+ }
+
if (expectLevel == level) {
results.add(fileStatus);
return;
@@ -327,7 +334,11 @@ public class PartitionPathUtils {
}
private static boolean isHiddenFile(FileStatus fileStatus) {
- String name = fileStatus.getPath().getName();
+ return isHiddenFile(fileStatus.getPath());
+ }
+
+ private static boolean isHiddenFile(Path path) {
+ String name = path.getName();
return name.startsWith("_") || name.startsWith(".");
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
index 5bf13d9209..ac49164ea8 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
@@ -161,7 +161,7 @@ public class FormatTableScanTest {
partitionType,
enablePartitionValueOnly);
- // Should optimize to specific partition path for first key
+ // Should not be optimized because of greater than
assertThat(result.getLeft()).isEqualTo(tableLocation);
assertThat(result.getRight()).isEqualTo(2);
@@ -202,6 +202,7 @@ public class FormatTableScanTest {
partitionType,
enablePartitionValueOnly);
String partitionPath = enablePartitionValueOnly ? "2023/12" :
"year=2023/month=12";
+
// Should optimize to specific partition path
assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
partitionPath);
assertThat(result.getRight()).isEqualTo(0);
@@ -265,6 +266,82 @@ public class FormatTableScanTest {
assertThat(searched.size()).isEqualTo(1);
}
+ @TestTemplate
+ void testNoOptimizationWithSecondEquality() throws IOException {
+ Path tableLocation = new Path(tmpPath.toUri());
+ // Create equality predicate for only the second partition key
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate predicate =
+ PredicateBuilder.and(builder.greaterOrEqual(0, 2023),
builder.equal(1, 12));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType, predicate);
+
+ Pair<Path, Integer> result =
+ FormatTableScan.computeScanPathAndLevel(
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
+
+ // Should not optimize with second equality filter
+ assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getRight()).isEqualTo(2);
+
+ // test searchPartSpecAndPaths
+ LocalFileIO fileIO = LocalFileIO.create();
+ String partitionPath = enablePartitionValueOnly ? "2023/12" :
"year=2023/month=12";
+ fileIO.mkdirs(new Path(tableLocation, partitionPath));
+ List<Pair<LinkedHashMap<String, String>, Path>> searched =
+ searchPartSpecAndPaths(
+ fileIO,
+ result.getLeft(),
+ result.getRight(),
+ partitionKeys,
+ enablePartitionValueOnly);
+ LinkedHashMap<String, String> expectPartitionSpec =
+ new LinkedHashMap<>(partitionKeys.size());
+ expectPartitionSpec.put("year", "2023");
+ expectPartitionSpec.put("month", "12");
+ assertThat(searched.get(0).getLeft()).isEqualTo(expectPartitionSpec);
+ assertThat(searched.size()).isEqualTo(1);
+ }
+
+ @TestTemplate
+ void testSkipIllegalPath() throws IOException {
+ Path tableLocation = new Path(tmpPath.toUri());
+ PartitionPredicate partitionFilter =
PartitionPredicate.fromPredicate(partitionType, null);
+ Pair<Path, Integer> result =
+ FormatTableScan.computeScanPathAndLevel(
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
+
+ LocalFileIO fileIO = LocalFileIO.create();
+ String illegalPath =
+ enablePartitionValueOnly
+ ? "_unknown-year/unknown-month"
+ : "unknown-year/unknown-month";
+ fileIO.mkdirs(new Path(tableLocation, illegalPath));
+ String partitionPath = enablePartitionValueOnly ? "2023/12" :
"year=2023/month=12";
+ fileIO.mkdirs(new Path(tableLocation, partitionPath));
+ List<Pair<LinkedHashMap<String, String>, Path>> searched =
+ searchPartSpecAndPaths(
+ fileIO,
+ result.getLeft(),
+ result.getRight(),
+ partitionKeys,
+ enablePartitionValueOnly);
+ LinkedHashMap<String, String> expectPartitionSpec =
+ new LinkedHashMap<>(partitionKeys.size());
+ expectPartitionSpec.put("year", "2023");
+ expectPartitionSpec.put("month", "12");
+ assertThat(searched.get(0).getLeft()).isEqualTo(expectPartitionSpec);
+ assertThat(searched.size()).isEqualTo(1);
+ }
+
@TestTemplate
void testComputeScanPathAndLevel() {
Path tableLocation = new Path(tmpPath.toUri());