This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 5ea57efd6f [core] optimize partition filter for format table (#6263)
5ea57efd6f is described below
commit 5ea57efd6f724a96a48730f486af20a0d590ce0a
Author: jerry <[email protected]>
AuthorDate: Fri Sep 19 16:03:23 2025 +0800
[core] optimize partition filter for format table (#6263)
---
.../predicate/OnlyPartitionKeyEqualVisitor.java | 7 +
.../paimon/partition/PartitionPredicate.java | 29 +++
.../paimon/table/format/FormatTableScan.java | 43 +++-
.../paimon/partition/PartitionPredicateTest.java | 155 ++++++++++++
.../paimon/table/format/FormatTableScanTest.java | 266 ++++++++++++++++++++-
5 files changed, 493 insertions(+), 7 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
b/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
index 1eda670db8..e446897d4b 100644
---
a/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
+++
b/paimon-common/src/main/java/org/apache/paimon/predicate/OnlyPartitionKeyEqualVisitor.java
@@ -33,6 +33,8 @@ public class OnlyPartitionKeyEqualVisitor implements
FunctionVisitor<Boolean> {
private final Map<String, String> partitions;
+ private boolean hasOrCondition = false;
+
public OnlyPartitionKeyEqualVisitor(List<String> partitionKeys) {
this.partitionKeys = partitionKeys;
partitions = new HashMap<>();
@@ -42,6 +44,10 @@ public class OnlyPartitionKeyEqualVisitor implements
FunctionVisitor<Boolean> {
return partitions;
}
+ public boolean hasOrCondition() {
+ return hasOrCondition;
+ }
+
@Override
public Boolean visitIsNotNull(FieldRef fieldRef) {
return false;
@@ -119,6 +125,7 @@ public class OnlyPartitionKeyEqualVisitor implements
FunctionVisitor<Boolean> {
@Override
public Boolean visitOr(List<Boolean> children) {
+ hasOrCondition = true;
return false;
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/partition/PartitionPredicate.java
b/paimon-core/src/main/java/org/apache/paimon/partition/PartitionPredicate.java
index 7e2369e335..9154fefcb5 100644
---
a/paimon-core/src/main/java/org/apache/paimon/partition/PartitionPredicate.java
+++
b/paimon-core/src/main/java/org/apache/paimon/partition/PartitionPredicate.java
@@ -26,6 +26,7 @@ import
org.apache.paimon.data.serializer.InternalRowSerializer;
import org.apache.paimon.data.serializer.InternalSerializers;
import org.apache.paimon.data.serializer.Serializer;
import org.apache.paimon.format.SimpleColStats;
+import org.apache.paimon.predicate.OnlyPartitionKeyEqualVisitor;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.predicate.PredicateBuilder;
import org.apache.paimon.statistics.FullSimpleColStatsCollector;
@@ -138,6 +139,11 @@ public interface PartitionPredicate extends Serializable {
};
}
+ default Map<String, String> extractLeadingEqualityPartitionSpecWhenOnlyAnd(
+ List<String> partitionKeys) {
+ return null;
+ }
+
/** A {@link PartitionPredicate} using {@link Predicate}. */
class DefaultPartitionPredicate implements PartitionPredicate {
@@ -162,6 +168,29 @@ public interface PartitionPredicate extends Serializable {
InternalArray nullCounts) {
return predicate.test(rowCount, minValues, maxValues, nullCounts);
}
+
+ @Override
+ @Nullable
+ public Map<String, String>
extractLeadingEqualityPartitionSpecWhenOnlyAnd(
+ List<String> partitionKeys) {
+ OnlyPartitionKeyEqualVisitor visitor = new
OnlyPartitionKeyEqualVisitor(partitionKeys);
+ boolean onlyEqual = predicate.visit(visitor);
+ if (visitor.hasOrCondition()) {
+ return null;
+ }
+ if (onlyEqual) {
+ return visitor.partitions();
+ }
+ Map<String, String> equalPartitions = new
HashMap<>(partitionKeys.size());
+ for (String partitionKey : partitionKeys) {
+ if (visitor.partitions().containsKey(partitionKey)) {
+ equalPartitions.put(partitionKey,
visitor.partitions().get(partitionKey));
+ } else {
+ break;
+ }
+ }
+ return equalPartitions;
+ }
}
/**
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
index 1850bc4c5c..73d9d8bd12 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
@@ -43,6 +43,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
/** {@link TableScan} for {@link FormatTable}. */
public class FormatTableScan implements InnerTableScan {
@@ -133,11 +134,17 @@ public class FormatTableScan implements InnerTableScan {
try {
FileIO fileIO = table.fileIO();
if (!table.partitionKeys().isEmpty()) {
+ Pair<Path, PartitionPredicate> scanPath2PartitionFilter =
+ getScanPathAndPartitionFilter(
+ new Path(table.location()),
+ table.partitionKeys(),
+ partitionFilter,
+ table.partitionType());
+ Path scanPath = scanPath2PartitionFilter.getLeft();
+ PartitionPredicate partitionFilter =
scanPath2PartitionFilter.getRight();
List<Pair<LinkedHashMap<String, String>, Path>>
partition2Paths =
PartitionPathUtils.searchPartSpecAndPaths(
- fileIO,
- new Path(table.location()),
- table.partitionKeys().size());
+ fileIO, scanPath,
table.partitionKeys().size());
for (Pair<LinkedHashMap<String, String>, Path>
partition2Path :
partition2Paths) {
LinkedHashMap<String, String> partitionSpec =
partition2Path.getKey();
@@ -165,6 +172,36 @@ public class FormatTableScan implements InnerTableScan {
}
}
+ protected static Pair<Path, PartitionPredicate>
getScanPathAndPartitionFilter(
+ Path tableLocation,
+ List<String> partitionKeys,
+ PartitionPredicate partitionFilter,
+ RowType partitionType) {
+ Path scanPath = tableLocation;
+ PartitionPredicate pf = partitionFilter;
+ if (!partitionKeys.isEmpty()) {
+ // Try to optimize for equality partition filters
+ if (partitionFilter != null) {
+ Map<String, String> equalityPartitionSpec =
+
partitionFilter.extractLeadingEqualityPartitionSpecWhenOnlyAnd(
+ partitionKeys);
+ if (equalityPartitionSpec != null &&
!equalityPartitionSpec.isEmpty()) {
+ // Use optimized scan for specific partition path
+ String partitionPath =
+ PartitionPathUtils.generatePartitionPath(
+ equalityPartitionSpec, partitionType);
+ scanPath = new Path(tableLocation, partitionPath);
+
+ // If equality spec covers all partition keys, no need for
further filtering
+ if (equalityPartitionSpec.size() == partitionKeys.size()) {
+ pf = null;
+ }
+ }
+ }
+ }
+ return Pair.of(scanPath, pf);
+ }
+
private List<Split> getSplits(FileIO fileIO, Path path, BinaryRow
partition)
throws IOException {
List<Split> splits = new ArrayList<>();
diff --git
a/paimon-core/src/test/java/org/apache/paimon/partition/PartitionPredicateTest.java
b/paimon-core/src/test/java/org/apache/paimon/partition/PartitionPredicateTest.java
index 0abf7f76f9..8d95768059 100644
---
a/paimon-core/src/test/java/org/apache/paimon/partition/PartitionPredicateTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/partition/PartitionPredicateTest.java
@@ -32,6 +32,8 @@ import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.Collections;
+import java.util.List;
+import java.util.Map;
import static org.apache.paimon.data.BinaryRow.EMPTY_ROW;
import static org.apache.paimon.predicate.PredicateBuilder.and;
@@ -170,4 +172,157 @@ public class PartitionPredicateTest {
writer.complete();
return row;
}
+
+ @Test
+ public void testExtractEqualityPartitionSpecWithAllEqualityWhenAllIsAnd() {
+ RowType type =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+
+ // Create predicate: year = 2023 AND month = 12 AND day = 25
+ PredicateBuilder builder = new PredicateBuilder(type);
+ Predicate equalityPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.equal(0, 2023),
builder.equal(1, 12)),
+ builder.equal(2, 25));
+ PartitionPredicate partitionPredicate =
+ PartitionPredicate.fromPredicate(type, equalityPredicate);
+
+ Map<String, String> result =
+
partitionPredicate.extractLeadingEqualityPartitionSpecWhenOnlyAnd(partitionKeys);
+
+ assertThat(result).isNotNull();
+ assertThat(result).hasSize(3);
+ assertThat(result.get("year")).isEqualTo("2023");
+ assertThat(result.get("month")).isEqualTo("12");
+ assertThat(result.get("day")).isEqualTo("25");
+ }
+
+ @Test
+ public void
testExtractEqualityPartitionSpecWithLeadingConsecutiveEqualityWhenAllIsAnd() {
+ RowType type =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+
+ // Create predicate: year = 2023 AND month = 12 AND day > 15
+ PredicateBuilder builder = new PredicateBuilder(type);
+ Predicate mixedPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.equal(0, 2023),
builder.equal(1, 12)),
+ builder.greaterThan(2, 15));
+ PartitionPredicate partitionPredicate =
+ PartitionPredicate.fromPredicate(type, mixedPredicate);
+
+ Map<String, String> result =
+
partitionPredicate.extractLeadingEqualityPartitionSpecWhenOnlyAnd(partitionKeys);
+
+ assertThat(result).isNotNull();
+ assertThat(result).hasSize(2);
+ assertThat(result.get("year")).isEqualTo("2023");
+ assertThat(result.get("month")).isEqualTo("12");
+ assertThat(result.containsKey("day")).isFalse();
+ }
+
+ @Test
+ public void
testExtractEqualityPartitionSpecWithFirstPartitionKeyEqualityWhenAllIsAnd() {
+ RowType type =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+
+ // Create predicate: year = 2023 AND month > 6 AND day = 15
+ PredicateBuilder builder = new PredicateBuilder(type);
+ Predicate mixedPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.equal(0, 2023),
builder.greaterThan(1, 6)),
+ builder.equal(2, 15));
+ PartitionPredicate partitionPredicate =
+ PartitionPredicate.fromPredicate(type, mixedPredicate);
+
+ Map<String, String> result =
+
partitionPredicate.extractLeadingEqualityPartitionSpecWhenOnlyAnd(partitionKeys);
+ assertThat(result).isNotNull();
+ assertThat(result).hasSize(1);
+ assertThat(result.get("year")).isEqualTo("2023");
+ assertThat(result.containsKey("month")).isFalse();
+ assertThat(result.containsKey("day")).isFalse();
+ }
+
+ @Test
+ public void
testExtractEqualityPartitionSpecWithNoLeadingEqualityWhenAllIsAnd() {
+ RowType type =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+
+ // Create predicate: year > 2020 AND month = 12 AND day = 15
+ PredicateBuilder builder = new PredicateBuilder(type);
+ Predicate mixedPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.greaterThan(0, 2020),
builder.equal(1, 12)),
+ builder.equal(2, 15));
+ PartitionPredicate partitionPredicate =
+ PartitionPredicate.fromPredicate(type, mixedPredicate);
+
+ Map<String, String> result =
+
partitionPredicate.extractLeadingEqualityPartitionSpecWhenOnlyAnd(partitionKeys);
+
+ assertThat(result).isEmpty();
+ }
+
+ @Test
+ public void
testExtractEqualityPartitionSpecWithNonEqualityPredicateWhenAllIsAnd() {
+ RowType type =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+ List<String> partitionKeys = Arrays.asList("year", "month");
+
+ // Create predicate: year > 2020 AND month > 6
+ PredicateBuilder builder = new PredicateBuilder(type);
+ Predicate nonEqualityPredicate =
+ PredicateBuilder.and(builder.greaterThan(0, 2020),
builder.greaterThan(1, 6));
+ PartitionPredicate partitionPredicate =
+ PartitionPredicate.fromPredicate(type, nonEqualityPredicate);
+
+ Map<String, String> result =
+
partitionPredicate.extractLeadingEqualityPartitionSpecWhenOnlyAnd(partitionKeys);
+
+ assertThat(result).isEmpty();
+ }
+
+ @Test
+ public void
testExtractLeadingEqualityPartitionSpecWhenOnlyAndWithOrPredicate() {
+ RowType type =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+ List<String> partitionKeys = Arrays.asList("year", "month");
+
+ // Create predicate: year = 2023 OR year = 2024
+ PredicateBuilder builder = new PredicateBuilder(type);
+ Predicate orPredicate = PredicateBuilder.or(builder.equal(0, 2023),
builder.equal(0, 2024));
+ PartitionPredicate partitionPredicate =
PartitionPredicate.fromPredicate(type, orPredicate);
+
+ Map<String, String> result =
+
partitionPredicate.extractLeadingEqualityPartitionSpecWhenOnlyAnd(partitionKeys);
+
+ assertThat(result).isNull();
+ }
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
index 326a9138f6..200374b79c 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
@@ -18,11 +18,23 @@
package org.apache.paimon.table.format;
-import org.junit.jupiter.api.DisplayName;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.partition.PartitionPredicate;
+import org.apache.paimon.predicate.Predicate;
+import org.apache.paimon.predicate.PredicateBuilder;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.Pair;
+
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -42,7 +54,6 @@ class FormatTableScanTest {
"9test",
"Test_file.log"
})
- @DisplayName("Test valid filenames that should return true")
void testValidDataFileNames(String fileName) {
assertTrue(
FormatTableScan.isDataFileName(fileName),
@@ -51,7 +62,6 @@ class FormatTableScanTest {
@ParameterizedTest
@ValueSource(strings = {".hidden", "_file.txt"})
- @DisplayName("Test invalid filenames that should return false")
void testInvalidDataFileNames(String fileName) {
assertFalse(
FormatTableScan.isDataFileName(fileName),
@@ -59,8 +69,256 @@ class FormatTableScanTest {
}
@Test
- @DisplayName("Test null input should return false")
void testNullInput() {
assertFalse(FormatTableScan.isDataFileName(null), "Null input should
return false");
}
+
+ @Test
+ void testGetScanPathAndPartitionFilterNoPartitionKeys() {
+ Path tableLocation = new Path("/test/table");
+ List<String> partitionKeys = Collections.emptyList();
+ RowType partitionType = RowType.of();
+ PartitionPredicate partitionFilter = PartitionPredicate.alwaysTrue();
+
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testGetScanPathAndPartitionFilterNullFilter() {
+ Path tableLocation = new Path("/test/table");
+ List<String> partitionKeys = Arrays.asList("year", "month");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, null, partitionType);
+
+ assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getRight()).isNull();
+ }
+
+ @Test
+ void testGetScanPathAndPartitionFilterWithEqualityFilter() {
+ Path tableLocation = new Path("/test/table");
+ List<String> partitionKeys = Arrays.asList("year", "month");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+
+ // Create equality predicate for all partition keys
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate equalityPredicate =
+ PredicateBuilder.and(builder.equal(0, 2023), builder.equal(1,
12));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
equalityPredicate);
+
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should optimize to specific partition path
+
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023/month=12");
+ assertThat(result.getRight()).isNull();
+ }
+
+ @Test
+ void
testGetScanPathAndPartitionFilterWithFirstPartitionKeyEqualityFilter() {
+ Path tableLocation = new Path("/test/table");
+ List<String> partitionKeys = Arrays.asList("year", "month");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+ // Create equality predicate for only the first partition key
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate firstKeyEqualityPredicate = builder.equal(0, 2023);
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
firstKeyEqualityPredicate);
+
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should optimize to specific partition path for first key
+
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023");
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testGetScanPathAndPartitionFilter() {
+ Path tableLocation = new Path("/test/table");
+ List<String> partitionKeys = Arrays.asList("year", "month");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+
+ // Create non-equality predicate
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate nonEqualityPredicate = builder.greaterThan(0, 2022);
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
nonEqualityPredicate);
+
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should not optimize, keep original path and filter
+ assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testGetScanPathAndPartitionFilterWithOrPredicate() {
+ Path tableLocation = new Path("/test/table");
+ List<String> partitionKeys = Arrays.asList("year", "month");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+
+ // Create OR predicate (not equality-only)
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate orPredicate = PredicateBuilder.or(builder.equal(0, 2023),
builder.equal(0, 2024));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType, orPredicate);
+
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should not optimize, keep original path and filter
+ assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testExtractEqualityPartitionSpecWithLeadingConsecutiveEquality() {
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+
+ // Create predicate: year = 2023 AND month = 12 AND day > 15
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate mixedPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.equal(0, 2023),
builder.equal(1, 12)),
+ builder.greaterThan(2, 15));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
mixedPredicate);
+
+ Path tableLocation = new Path("/test/table");
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should optimize to year and month path (leading consecutive
equality)
+
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023/month=12");
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testExtractEqualityPartitionSpecWithNonConsecutiveEquality() {
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+
+ // Create predicate: year = 2023 AND month > 6 AND day = 15
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate mixedPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.equal(0, 2023),
builder.greaterThan(1, 6)),
+ builder.equal(2, 15));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
mixedPredicate);
+
+ Path tableLocation = new Path("/test/table");
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should optimize only to year path (first equality, then stop at
non-equality)
+
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023");
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testExtractEqualityPartitionSpecWithSecondPartitionKeyEqualityOnly() {
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+
+ // Create predicate: year > 2020 AND month = 12 AND day = 15
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate mixedPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.greaterThan(0, 2020),
builder.equal(1, 12)),
+ builder.equal(2, 15));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
mixedPredicate);
+
+ Path tableLocation = new Path("/test/table");
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should not optimize because first partition key is not equality
+ assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getRight()).isEqualTo(partitionFilter);
+ }
+
+ @Test
+ void testExtractEqualityPartitionSpecWithAllEqualityConditions() {
+ List<String> partitionKeys = Arrays.asList("year", "month", "day");
+ RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .field("day", DataTypes.INT())
+ .build();
+
+ // Create predicate: year = 2023 AND month = 12 AND day = 25
+ PredicateBuilder builder = new PredicateBuilder(partitionType);
+ Predicate allEqualityPredicate =
+ PredicateBuilder.and(
+ PredicateBuilder.and(builder.equal(0, 2023),
builder.equal(1, 12)),
+ builder.equal(2, 25));
+ PartitionPredicate partitionFilter =
+ PartitionPredicate.fromPredicate(partitionType,
allEqualityPredicate);
+
+ Path tableLocation = new Path("/test/table");
+ Pair<Path, PartitionPredicate> result =
+ FormatTableScan.getScanPathAndPartitionFilter(
+ tableLocation, partitionKeys, partitionFilter,
partitionType);
+
+ // Should optimize to full partition path and no further filtering
needed
+
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023/month=12/day=25");
+ assertThat(result.getRight()).isNull();
+ }
}