This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new bead1aabef [core] support whether only partition value in path for
format table (#6300)
bead1aabef is described below
commit bead1aabefe514aef79c562e550d55199f022368
Author: jerry <[email protected]>
AuthorDate: Fri Sep 26 15:00:34 2025 +0800
[core] support whether only partition value in path for format table
(#6300)
---
.../shortcodes/generated/core_configuration.html | 6 +
.../main/java/org/apache/paimon/CoreOptions.java | 10 +
.../paimon/table/format/FormatTableScan.java | 37 ++-
.../apache/paimon/utils/PartitionPathUtils.java | 49 +++-
.../org/apache/paimon/catalog/CatalogTestBase.java | 66 ++++++
.../paimon/table/format/FormatTableScanTest.java | 256 +++++++++++----------
.../utils/PartitionStatisticsReporterTest.java | 3 +-
.../java/org/apache/paimon/hive/HiveCatalog.java | 3 +-
8 files changed, 295 insertions(+), 135 deletions(-)
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html
b/docs/layouts/shortcodes/generated/core_configuration.html
index 2ca6b6c7bc..9c243fa163 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -506,6 +506,12 @@ under the License.
<td>Boolean</td>
<td>Whether to force the use of lookup for compaction.</td>
</tr>
+ <tr>
+ <td><h5>format-table.partition-path-only-value</h5></td>
+ <td style="word-wrap: break-word;">false</td>
+ <td>Boolean</td>
+ <td>Format table file path only contain partition value.</td>
+ </tr>
<tr>
<td><h5>full-compaction.delta-commits</h5></td>
<td style="word-wrap: break-word;">(none)</td>
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
index ad7bb024f2..e0a5b5fcb6 100644
--- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
@@ -1971,6 +1971,12 @@ public class CoreOptions implements Serializable {
.defaultValue(1024)
.withDescription("Threshold for merging records to binary
buffer in lookup.");
+ public static final ConfigOption<Boolean>
FORMAT_TABLE_PARTITION_ONLY_VALUE_IN_PATH =
+ ConfigOptions.key("format-table.partition-path-only-value")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("Format table file path only contain
partition value.");
+
private final Options options;
public CoreOptions(Map<String, String> options) {
@@ -3029,6 +3035,10 @@ public class CoreOptions implements Serializable {
return options.get(LOOKUP_MERGE_RECORDS_THRESHOLD);
}
+ public boolean formatTablePartitionOnlyValueInPath() {
+ return options.get(FORMAT_TABLE_PARTITION_ONLY_VALUE_IN_PATH);
+ }
+
/** Specifies the merge engine for table with primary key. */
public enum MergeEngine implements DescribedEnum {
DEDUPLICATE("deduplicate", "De-duplicate and keep the last row."),
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
index e5aadd69e2..f9a3f0067c 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java
@@ -18,6 +18,7 @@
package org.apache.paimon.table.format;
+import org.apache.paimon.CoreOptions;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.serializer.InternalRowSerializer;
@@ -59,6 +60,7 @@ import static
org.apache.paimon.utils.PartitionPathUtils.searchPartSpecAndPaths;
public class FormatTableScan implements InnerTableScan {
private final FormatTable table;
+ private final CoreOptions coreOptions;
@Nullable private PartitionPredicate partitionFilter;
@Nullable private final Integer limit;
@@ -67,6 +69,7 @@ public class FormatTableScan implements InnerTableScan {
@Nullable PartitionPredicate partitionFilter,
@Nullable Integer limit) {
this.table = table;
+ this.coreOptions = new CoreOptions(table.options());
this.partitionFilter = partitionFilter;
this.limit = limit;
}
@@ -86,7 +89,11 @@ public class FormatTableScan implements InnerTableScan {
public List<PartitionEntry> listPartitionEntries() {
List<Pair<LinkedHashMap<String, String>, Path>> partition2Paths =
searchPartSpecAndPaths(
- table.fileIO(), new Path(table.location()),
table.partitionKeys().size());
+ table.fileIO(),
+ new Path(table.location()),
+ table.partitionKeys().size(),
+ table.partitionKeys(),
+ coreOptions.formatTablePartitionOnlyValueInPath());
List<PartitionEntry> partitionEntries = new ArrayList<>();
for (Pair<LinkedHashMap<String, String>, Path> partition2Path :
partition2Paths) {
BinaryRow row = toPartitionRow(partition2Path.getKey());
@@ -152,7 +159,8 @@ public class FormatTableScan implements InnerTableScan {
table.partitionType(),
table.defaultPartName(),
new Path(table.location()),
- partitions);
+ partitions,
+ coreOptions.formatTablePartitionOnlyValueInPath());
} else {
// search paths
Pair<Path, Integer> scanPathAndLevel =
@@ -160,10 +168,16 @@ public class FormatTableScan implements InnerTableScan {
new Path(table.location()),
table.partitionKeys(),
partitionFilter,
- table.partitionType());
+ table.partitionType(),
+ coreOptions.formatTablePartitionOnlyValueInPath());
Path scanPath = scanPathAndLevel.getLeft();
int level = scanPathAndLevel.getRight();
- return searchPartSpecAndPaths(table.fileIO(), scanPath, level);
+ return searchPartSpecAndPaths(
+ table.fileIO(),
+ scanPath,
+ level,
+ table.partitionKeys(),
+ coreOptions.formatTablePartitionOnlyValueInPath());
}
}
@@ -172,7 +186,8 @@ public class FormatTableScan implements InnerTableScan {
RowType partitionType,
String defaultPartName,
Path tablePath,
- Set<BinaryRow> partitions) {
+ Set<BinaryRow> partitions,
+ boolean onlyValueInPath) {
InternalRowPartitionComputer partitionComputer =
new InternalRowPartitionComputer(
defaultPartName,
@@ -182,7 +197,11 @@ public class FormatTableScan implements InnerTableScan {
List<Pair<LinkedHashMap<String, String>, Path>> result = new
ArrayList<>();
for (BinaryRow part : partitions) {
LinkedHashMap<String, String> partSpec =
partitionComputer.generatePartValues(part);
- String path = PartitionPathUtils.generatePartitionPath(partSpec);
+
+ String path =
+ onlyValueInPath
+ ?
PartitionPathUtils.generatePartitionPathUtil(partSpec, true)
+ :
PartitionPathUtils.generatePartitionPath(partSpec);
result.add(Pair.of(partSpec, new Path(tablePath, path)));
}
return result;
@@ -192,7 +211,8 @@ public class FormatTableScan implements InnerTableScan {
Path tableLocation,
List<String> partitionKeys,
PartitionPredicate partitionFilter,
- RowType partitionType) {
+ RowType partitionType,
+ boolean onlyValueInPath) {
Path scanPath = tableLocation;
int level = partitionKeys.size();
if (!partitionKeys.isEmpty()) {
@@ -205,7 +225,8 @@ public class FormatTableScan implements InnerTableScan {
if (!equalityPrefix.isEmpty()) {
// Use optimized scan for specific partition path
String partitionPath =
-
PartitionPathUtils.generatePartitionPath(equalityPrefix, partitionType);
+ PartitionPathUtils.generatePartitionPath(
+ equalityPrefix, partitionType,
onlyValueInPath);
scanPath = new Path(tableLocation, partitionPath);
level = partitionKeys.size() - equalityPrefix.size();
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
index 26aa0d77a2..185479d8a5 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionPathUtils.java
@@ -24,6 +24,8 @@ import org.apache.paimon.fs.Path;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.RowType;
+import javax.annotation.Nullable;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
@@ -75,6 +77,11 @@ public class PartitionPathUtils {
* @return An escaped, valid partition name.
*/
public static String generatePartitionPath(LinkedHashMap<String, String>
partitionSpec) {
+ return generatePartitionPathUtil(partitionSpec, false);
+ }
+
+ public static String generatePartitionPathUtil(
+ LinkedHashMap<String, String> partitionSpec, boolean onlyValue) {
if (partitionSpec.isEmpty()) {
return "";
}
@@ -84,8 +91,10 @@ public class PartitionPathUtils {
if (i > 0) {
suffixBuf.append(Path.SEPARATOR);
}
- suffixBuf.append(escapePathName(e.getKey()));
- suffixBuf.append('=');
+ if (!onlyValue) {
+ suffixBuf.append(escapePathName(e.getKey()));
+ suffixBuf.append('=');
+ }
suffixBuf.append(escapePathName(e.getValue()));
i++;
}
@@ -98,12 +107,13 @@ public class PartitionPathUtils {
return partitions.stream()
.map(
partition ->
-
PartitionPathUtils.generatePartitionPath(partition, partitionType))
+ PartitionPathUtils.generatePartitionPath(
+ partition, partitionType, false))
.collect(Collectors.toList());
}
public static String generatePartitionPath(
- Map<String, String> partitionSpec, RowType partitionType) {
+ Map<String, String> partitionSpec, RowType partitionType, boolean
onlyValue) {
LinkedHashMap<String, String> linkedPartitionSpec = new
LinkedHashMap<>();
List<DataField> fields = partitionType.getFields();
@@ -115,7 +125,9 @@ public class PartitionPathUtils {
}
}
- return generatePartitionPath(linkedPartitionSpec);
+ return onlyValue
+ ? generatePartitionPathUtil(linkedPartitionSpec, true)
+ : generatePartitionPath(linkedPartitionSpec);
}
/**
@@ -239,6 +251,17 @@ public class PartitionPathUtils {
return fullPartSpec;
}
+ public static LinkedHashMap<String, String>
extractPartitionSpecFromPathOnlyValue(
+ Path currPath, List<String> partitionKeys, int partitionNumber) {
+ LinkedHashMap<String, String> fullPartSpec = new LinkedHashMap<>();
+ String[] split = currPath.toString().split(Path.SEPARATOR);
+ int equalityPartitionSize = partitionKeys.size() - partitionNumber;
+ for (int i = 0; i < equalityPartitionSize; i++) {
+ fullPartSpec.put(partitionKeys.get(i), split[split.length -
partitionKeys.size() + i]);
+ }
+ return fullPartSpec;
+ }
+
/**
* Search all partitions in this path.
*
@@ -247,7 +270,11 @@ public class PartitionPathUtils {
* @return all partition specs to its path.
*/
public static List<Pair<LinkedHashMap<String, String>, Path>>
searchPartSpecAndPaths(
- FileIO fileIO, Path path, int partitionNumber) {
+ FileIO fileIO,
+ Path path,
+ int partitionNumber,
+ @Nullable List<String> partitionKeys,
+ boolean enablePartitionOnlyValueInPath) {
FileStatus[] generatedParts = getFileStatusRecurse(path,
partitionNumber, fileIO);
List<Pair<LinkedHashMap<String, String>, Path>> ret = new
ArrayList<>();
for (FileStatus part : generatedParts) {
@@ -255,7 +282,15 @@ public class PartitionPathUtils {
if (isHiddenFile(part)) {
continue;
}
- ret.add(Pair.of(extractPartitionSpecFromPath(part.getPath()),
part.getPath()));
+ if (enablePartitionOnlyValueInPath && partitionKeys != null) {
+ ret.add(
+ Pair.of(
+ extractPartitionSpecFromPathOnlyValue(
+ part.getPath(), partitionKeys,
partitionNumber),
+ part.getPath()));
+ } else {
+ ret.add(Pair.of(extractPartitionSpecFromPath(part.getPath()),
part.getPath()));
+ }
}
return ret;
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/catalog/CatalogTestBase.java
b/paimon-core/src/test/java/org/apache/paimon/catalog/CatalogTestBase.java
index b1ffa865ae..05da64ed49 100644
--- a/paimon-core/src/test/java/org/apache/paimon/catalog/CatalogTestBase.java
+++ b/paimon-core/src/test/java/org/apache/paimon/catalog/CatalogTestBase.java
@@ -584,6 +584,72 @@ public abstract class CatalogTestBase {
.isInstanceOf(RuntimeException.class);
}
+ @Test
+ public void testFormatTableOnlyPartitionValueRead() throws Exception {
+ if (!supportsFormatTable()) {
+ return;
+ }
+ Random random = new Random();
+ String dbName = "test_db";
+ catalog.createDatabase(dbName, true);
+ HadoopCompressionType compressionType = HadoopCompressionType.GZIP;
+ Schema.Builder schemaBuilder = Schema.newBuilder();
+ schemaBuilder.column("f1", DataTypes.INT());
+ schemaBuilder.column("f2", DataTypes.INT());
+ schemaBuilder.column("dt", DataTypes.INT());
+ schemaBuilder.column("dt2", DataTypes.VARCHAR(64));
+ schemaBuilder.partitionKeys("dt", "dt2");
+ schemaBuilder.option("type", "format-table");
+ schemaBuilder.option("file.compression", compressionType.value());
+ schemaBuilder.option("format-table.partition-path-only-value", "true");
+ String[] formats = {"csv", "parquet", "json"};
+ int dtPartitionValue = 10;
+ String dt2PartitionValue = "2022-01-01";
+ for (String format : formats) {
+ Identifier identifier = Identifier.create(dbName,
"partition_table_" + format);
+ schemaBuilder.option("file.format", format);
+ catalog.createTable(identifier, schemaBuilder.build(), true);
+ FormatTable table = (FormatTable) catalog.getTable(identifier);
+ int size = 5;
+ InternalRow[] datas = new InternalRow[size];
+ for (int j = 0; j < size; j++) {
+ datas[j] =
+ GenericRow.of(
+ random.nextInt(),
+ random.nextInt(),
+ dtPartitionValue,
+ BinaryString.fromString(dt2PartitionValue));
+ }
+ FormatWriterFactory factory =
+ (buildFileFormatFactory(format)
+ .create(
+ new
FileFormatFactory.FormatContext(
+ new Options(), 1024,
1024)))
+ .createWriterFactory(table.rowType());
+ Path partitionPath =
+ new Path(
+ String.format(
+ "%s/%s/%s",
+ table.location(), dtPartitionValue,
dt2PartitionValue));
+ DataFilePathFactory dataFilePathFactory =
+ new DataFilePathFactory(
+ partitionPath,
+ format,
+ "data",
+ "change",
+ true,
+ compressionType.value(),
+ null);
+ write(factory, dataFilePathFactory.newPath(),
compressionType.value(), datas);
+ Map<String, String> partitionSpec = new HashMap<>();
+ partitionSpec.put("dt", "" + dtPartitionValue);
+ partitionSpec.put("dt2", dt2PartitionValue);
+ List<InternalRow> readFilterData = read(table, null, null,
partitionSpec, null);
+ assertThat(readFilterData).containsExactlyInAnyOrder(datas);
+ catalog.dropTable(Identifier.create(dbName, format), true);
+ }
+ }
+
@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testFormatTableRead(boolean partitioned) throws Exception {
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
index f9a90960b5..87678e1b57 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/format/FormatTableScanTest.java
@@ -26,14 +26,15 @@ import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.partition.PartitionPredicate;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.predicate.PredicateBuilder;
+import
org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension;
+import org.apache.paimon.testutils.junit.parameterized.Parameters;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.Pair;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.util.Arrays;
@@ -48,109 +49,106 @@ import static
org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
/** Test for {@link FormatTableScan}. */
-class FormatTableScanTest {
+@ExtendWith(ParameterizedTestExtension.class)
+public class FormatTableScanTest {
@TempDir java.nio.file.Path tmpPath;
- @ParameterizedTest
- @ValueSource(
- strings = {
- "File.txt",
- "file.txt",
- "123file.txt",
- "F",
- "File-1.txt",
- "a",
- "0",
- "9test",
- "Test_file.log"
- })
- void testValidDataFileNames(String fileName) {
- assertTrue(
- FormatTableScan.isDataFileName(fileName),
- "Filename '" + fileName + "' should be valid");
+ private final boolean enablePartitionValueOnly;
+ private final Path defaultTableLocation = new Path("/test/table_scan");
+ private final RowType partitionType =
+ RowType.builder()
+ .field("year", DataTypes.INT())
+ .field("month", DataTypes.INT())
+ .build();
+ private final BinaryRow partition =
+ new
InternalRowSerializer(partitionType).toBinaryRow(GenericRow.of(2023, 2));
+ private final String partitionPath;
+ private final List<String> partitionKeys = partitionType.getFieldNames();
+
+ public FormatTableScanTest(boolean enablePartitionValueOnly, String
partitionPath) {
+ this.enablePartitionValueOnly = enablePartitionValueOnly;
+ this.partitionPath = defaultTableLocation + partitionPath;
}
- @ParameterizedTest
- @ValueSource(strings = {".hidden", "_file.txt"})
- void testInvalidDataFileNames(String fileName) {
- assertFalse(
- FormatTableScan.isDataFileName(fileName),
- "Filename '" + fileName + "' should be invalid");
+ @Parameters(name = "enablePartitionValueOnly={0},partitionPath={1}")
+ public static List<Object[]> parameters() {
+ return Arrays.asList(
+ new Object[] {false, "/year=2023/month=2"}, new Object[]
{true, "/2023/2"});
}
- @Test
+ @TestTemplate
+ void testValidDataFileNames() {
+ // Test valid data file names
+ String[] fileNames = {"File.txt", "file.txt", "123file.txt", "data",
"Test_file.log"};
+ for (String fileName : fileNames) {
+ assertTrue(
+ FormatTableScan.isDataFileName(fileName),
+ "Filename '" + fileName + "' should be valid");
+ }
+ }
+
+ @TestTemplate
+ void testInvalidDataFileNames() {
+ String[] fileNames = {".hidden", "_file.txt"};
+ for (String fileName : fileNames) {
+ assertFalse(
+ FormatTableScan.isDataFileName(fileName),
+ "Filename '" + fileName + "' should be invalid");
+ }
+ }
+
+ @TestTemplate
void testNullInput() {
assertFalse(FormatTableScan.isDataFileName(null), "Null input should
return false");
}
- @Test
+ @TestTemplate
void testComputeScanPathAndLevelNoPartitionKeys() {
- Path tableLocation = new Path("/test/table");
List<String> partitionKeys = Collections.emptyList();
RowType partitionType = RowType.of();
PartitionPredicate partitionFilter = PartitionPredicate.alwaysTrue();
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ defaultTableLocation, partitionKeys, partitionFilter,
partitionType, false);
- assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getLeft()).isEqualTo(defaultTableLocation);
assertThat(result.getRight()).isEqualTo(0);
}
- @Test
+ @TestTemplate
void testGeneratePartitions() {
- Path tableLocation = new Path("/test/table");
- List<String> partitionKeys = Arrays.asList("year", "month");
- RowType partitionType =
- RowType.builder()
- .field("year", DataTypes.INT())
- .field("month", DataTypes.INT())
- .build();
-
- BinaryRow partition =
- new
InternalRowSerializer(partitionType).toBinaryRow(GenericRow.of(2023, 2));
List<Pair<LinkedHashMap<String, String>, Path>> result =
FormatTableScan.generatePartitions(
partitionKeys,
partitionType,
"",
- tableLocation,
- Collections.singleton(partition));
+ defaultTableLocation,
+ Collections.singleton(partition),
+ enablePartitionValueOnly);
assertThat(result.size()).isEqualTo(1);
assertThat(result.get(0).getLeft().toString()).isEqualTo("{year=2023,
month=2}");
-
assertThat(result.get(0).getRight().toString()).isEqualTo("/test/table/year=2023/month=2");
+
assertThat(result.get(0).getRight().toString()).isEqualTo(partitionPath);
}
- @Test
+ @TestTemplate
void testGetScanPathAndLevelNullFilter() {
- Path tableLocation = new Path("/test/table");
- List<String> partitionKeys = Arrays.asList("year", "month");
- RowType partitionType =
- RowType.builder()
- .field("year", DataTypes.INT())
- .field("month", DataTypes.INT())
- .build();
-
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, null, partitionType);
+ defaultTableLocation,
+ partitionType.getFieldNames(),
+ null,
+ partitionType,
+ false);
- assertThat(result.getLeft()).isEqualTo(tableLocation);
+ assertThat(result.getLeft()).isEqualTo(defaultTableLocation);
assertThat(result.getRight()).isEqualTo(2);
}
- @Test
+ @TestTemplate
void testGetScanPathAndLevelWithEqualityFilter() throws IOException {
Path tableLocation = new Path(tmpPath.toUri());
- List<String> partitionKeys = Arrays.asList("year", "month");
- RowType partitionType =
- RowType.builder()
- .field("year", DataTypes.INT())
- .field("month", DataTypes.INT())
- .build();
-
// Create equality predicate for all partition keys
PredicateBuilder builder = new PredicateBuilder(partitionType);
Predicate equalityPredicate =
@@ -160,29 +158,32 @@ class FormatTableScanTest {
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
-
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
+ String partitionPath = enablePartitionValueOnly ? "2023/12" :
"year=2023/month=12";
// Should optimize to specific partition path
- assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
"year=2023/month=12");
+ assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
partitionPath);
assertThat(result.getRight()).isEqualTo(0);
// test searchPartSpecAndPaths
LocalFileIO fileIO = LocalFileIO.create();
- fileIO.mkdirs(new Path(tableLocation, "year=2023/month=12"));
+ fileIO.mkdirs(new Path(tableLocation, partitionPath));
List<Pair<LinkedHashMap<String, String>, Path>> searched =
- searchPartSpecAndPaths(fileIO, result.getLeft(),
result.getRight());
+ searchPartSpecAndPaths(
+ fileIO,
+ result.getLeft(),
+ result.getRight(),
+ null,
+ enablePartitionValueOnly);
assertThat(searched.size()).isEqualTo(1);
}
- @Test
+ @TestTemplate
void testComputeScanPathWithFirstLevel() throws IOException {
Path tableLocation = new Path(tmpPath.toUri());
- List<String> partitionKeys = Arrays.asList("year", "month");
- RowType partitionType =
- RowType.builder()
- .field("year", DataTypes.INT())
- .field("month", DataTypes.INT())
- .build();
// Create equality predicate for only the first partition key
PredicateBuilder builder = new PredicateBuilder(partitionType);
Predicate firstKeyEqualityPredicate = builder.equal(0, 2023);
@@ -191,30 +192,29 @@ class FormatTableScanTest {
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should optimize to specific partition path for first key
- assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
"year=2023");
+ String partitionPath = enablePartitionValueOnly ? "2023" : "year=2023";
+ assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
partitionPath);
assertThat(result.getRight()).isEqualTo(1);
// test searchPartSpecAndPaths
LocalFileIO fileIO = LocalFileIO.create();
- fileIO.mkdirs(new Path(tableLocation, "year=2023/month=12"));
+ partitionPath = enablePartitionValueOnly ? "2023/12" :
"year=2023/month=12";
+ fileIO.mkdirs(new Path(tableLocation, partitionPath));
List<Pair<LinkedHashMap<String, String>, Path>> searched =
- searchPartSpecAndPaths(fileIO, result.getLeft(),
result.getRight());
+ searchPartSpecAndPaths(fileIO, result.getLeft(),
result.getRight(), null, false);
assertThat(searched.size()).isEqualTo(1);
}
- @Test
+ @TestTemplate
void testComputeScanPathAndLevel() {
- Path tableLocation = new Path("/test/table");
- List<String> partitionKeys = Arrays.asList("year", "month");
- RowType partitionType =
- RowType.builder()
- .field("year", DataTypes.INT())
- .field("month", DataTypes.INT())
- .build();
-
+ Path tableLocation = new Path(tmpPath.toUri());
// Create non-equality predicate
PredicateBuilder builder = new PredicateBuilder(partitionType);
Predicate nonEqualityPredicate = builder.greaterThan(0, 2022);
@@ -223,22 +223,20 @@ class FormatTableScanTest {
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should not optimize, keep original path and level
assertThat(result.getLeft()).isEqualTo(tableLocation);
assertThat(result.getRight()).isEqualTo(2);
}
- @Test
+ @TestTemplate
void testComputeScanPathAndLevelWithOrPredicate() {
- Path tableLocation = new Path("/test/table");
- List<String> partitionKeys = Arrays.asList("year", "month");
- RowType partitionType =
- RowType.builder()
- .field("year", DataTypes.INT())
- .field("month", DataTypes.INT())
- .build();
+ Path tableLocation = new Path(tmpPath.toUri());
// Create OR predicate (not equality-only)
PredicateBuilder builder = new PredicateBuilder(partitionType);
@@ -248,14 +246,18 @@ class FormatTableScanTest {
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should not optimize, keep original path and level
assertThat(result.getLeft()).isEqualTo(tableLocation);
assertThat(result.getRight()).isEqualTo(2);
}
- @Test
+ @TestTemplate
void testExtractEqualityPartitionSpecWithLeadingConsecutiveEquality() {
List<String> partitionKeys = Arrays.asList("year", "month", "day");
RowType partitionType =
@@ -277,14 +279,19 @@ class FormatTableScanTest {
Path tableLocation = new Path("/test/table");
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should optimize to year and month path (leading consecutive
equality)
-
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023/month=12");
+ String partitionPath = enablePartitionValueOnly ? "/2023/12" :
"/year=2023/month=12";
+ assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
partitionPath);
assertThat(result.getRight()).isEqualTo(1);
}
- @Test
+ @TestTemplate
void testExtractEqualityPartitionSpecWithNonConsecutiveEquality() {
List<String> partitionKeys = Arrays.asList("year", "month", "day");
RowType partitionType =
@@ -306,14 +313,19 @@ class FormatTableScanTest {
Path tableLocation = new Path("/test/table");
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should optimize only to year path (first equality, then stop at
non-equality)
-
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023");
+ String partitionPath = enablePartitionValueOnly ? "/2023" :
"/year=2023";
+ assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
partitionPath);
assertThat(result.getRight()).isEqualTo(2);
}
- @Test
+ @TestTemplate
void testExtractEqualityPartitionSpecWithSecondPartitionKeyEqualityOnly() {
List<String> partitionKeys = Arrays.asList("year", "month", "day");
RowType partitionType =
@@ -335,14 +347,18 @@ class FormatTableScanTest {
Path tableLocation = new Path("/test/table");
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should not optimize because first partition key is not equality
assertThat(result.getLeft()).isEqualTo(tableLocation);
assertThat(result.getRight()).isEqualTo(3);
}
- @Test
+ @TestTemplate
void testExtractEqualityPartitionSpecWithAllEqualityConditions() {
List<String> partitionKeys = Arrays.asList("year", "month", "day");
RowType partitionType =
@@ -364,14 +380,20 @@ class FormatTableScanTest {
Path tableLocation = new Path("/test/table");
Pair<Path, Integer> result =
FormatTableScan.computeScanPathAndLevel(
- tableLocation, partitionKeys, partitionFilter,
partitionType);
+ tableLocation,
+ partitionKeys,
+ partitionFilter,
+ partitionType,
+ enablePartitionValueOnly);
// Should optimize to full partition path and no further filtering
needed
-
assertThat(result.getLeft().toString()).isEqualTo("/test/table/year=2023/month=12/day=25");
+ String partitionPath =
+ enablePartitionValueOnly ? "/2023/12/25" :
"/year=2023/month=12/day=25";
+ assertThat(result.getLeft().toString()).isEqualTo(tableLocation +
partitionPath);
assertThat(result.getRight()).isEqualTo(0);
}
- @Test
+ @TestTemplate
public void testExtractEqualityPartitionSpecWithAllEqualityWhenAllIsAnd() {
RowType type =
RowType.builder()
@@ -398,7 +420,7 @@ class FormatTableScanTest {
assertThat(result.get("day")).isEqualTo("25");
}
- @Test
+ @TestTemplate
public void
testExtractEqualityPartitionSpecWithLeadingConsecutiveEqualityWhenAllIsAnd() {
RowType type =
RowType.builder()
@@ -426,7 +448,7 @@ class FormatTableScanTest {
assertThat(result.containsKey("day")).isFalse();
}
- @Test
+ @TestTemplate
public void
testExtractEqualityPartitionSpecWithFirstPartitionKeyEqualityWhenAllIsAnd() {
RowType type =
RowType.builder()
@@ -452,7 +474,7 @@ class FormatTableScanTest {
assertThat(result.containsKey("day")).isFalse();
}
- @Test
+ @TestTemplate
public void
testExtractEqualityPartitionSpecWithNoLeadingEqualityWhenAllIsAnd() {
RowType type =
RowType.builder()
@@ -476,7 +498,7 @@ class FormatTableScanTest {
assertThat(result).isEmpty();
}
- @Test
+ @TestTemplate
public void
testExtractEqualityPartitionSpecWithNonEqualityPredicateWhenAllIsAnd() {
RowType type =
RowType.builder()
@@ -497,7 +519,7 @@ class FormatTableScanTest {
assertThat(result).isEmpty();
}
- @Test
+ @TestTemplate
public void
testExtractLeadingEqualityPartitionSpecWhenOnlyAndWithOrPredicate() {
RowType type =
RowType.builder()
diff --git
a/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java
b/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java
index 29c84bf347..5ffb50e95f 100644
---
a/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java
@@ -116,7 +116,8 @@ public class PartitionStatisticsReporterTest {
partitionParams.put(
PartitionPathUtils.generatePartitionPath(
partition.spec(),
-
table.rowType().project(table.partitionKeys())),
+
table.rowType().project(table.partitionKeys()),
+ false),
partition);
});
}
diff --git
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
index 5485fc6b6b..7cb38c3617 100644
---
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
+++
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java
@@ -1087,8 +1087,7 @@ public class HiveCatalog extends AbstractCatalog {
// file.format option has a default value and cannot be empty.
FormatTable.Format provider =
FormatTable.parseFormat(coreOptions.formatType());
- Map<String, String> tblProperties = new HashMap<>();
-
+ Map<String, String> tblProperties = new
HashMap<>(tableSchema.options());
Table table = newHmsTable(identifier, tblProperties, provider,
externalTable);
updateHmsTable(table, identifier, tableSchema, provider, location);