This is an automated email from the ASF dual-hosted git repository.
amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 6b04a6d000 Core: Track data files by spec id instead of full
PartitionSpec (#11323)
6b04a6d000 is described below
commit 6b04a6d000019a6182eb6521d1e7e4124a0cd73b
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Fri Oct 25 16:25:23 2024 +0200
Core: Track data files by spec id instead of full PartitionSpec (#11323)
---
.../apache/iceberg/MergingSnapshotProducer.java | 23 +++++++++-------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
index ab55f86ebf..50885dbb06 100644
--- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
+++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
@@ -80,7 +80,7 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
private final ManifestFilterManager<DeleteFile> deleteFilterManager;
// update data
- private final Map<PartitionSpec, DataFileSet> newDataFilesBySpec =
Maps.newHashMap();
+ private final Map<Integer, DataFileSet> newDataFilesBySpec =
Maps.newHashMap();
private Long newDataFilesDataSequenceNumber;
private final Map<Integer, DeleteFileSet> newDeleteFilesBySpec =
Maps.newHashMap();
private final List<ManifestFile> appendManifests = Lists.newArrayList();
@@ -138,18 +138,13 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
}
protected PartitionSpec dataSpec() {
- Set<PartitionSpec> specs = dataSpecs();
+ Set<Integer> specIds = newDataFilesBySpec.keySet();
Preconditions.checkState(
- specs.size() == 1,
- "Cannot return a single partition spec: data files with different
partition specs have been added");
- return specs.iterator().next();
- }
-
- protected Set<PartitionSpec> dataSpecs() {
- Set<PartitionSpec> specs = newDataFilesBySpec.keySet();
+ !specIds.isEmpty(), "Cannot determine partition specs: no data files
have been added");
Preconditions.checkState(
- !specs.isEmpty(), "Cannot determine partition specs: no data files
have been added");
- return ImmutableSet.copyOf(specs);
+ specIds.size() == 1,
+ "Cannot return a single partition spec: data files with different
partition specs have been added");
+ return spec(Iterables.getOnlyElement(specIds));
}
protected Expression rowFilter() {
@@ -237,7 +232,7 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
file.location());
DataFileSet dataFiles =
- newDataFilesBySpec.computeIfAbsent(spec, ignored ->
DataFileSet.create());
+ newDataFilesBySpec.computeIfAbsent(spec.specId(), ignored ->
DataFileSet.create());
if (dataFiles.add(file)) {
addedFilesSummary.addedFile(spec, file);
hasNewDataFiles = true;
@@ -971,9 +966,9 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
if (cachedNewDataManifests.isEmpty()) {
newDataFilesBySpec.forEach(
- (dataSpec, dataFiles) -> {
+ (specId, dataFiles) -> {
List<ManifestFile> newDataManifests =
- writeDataManifests(dataFiles, newDataFilesDataSequenceNumber,
dataSpec);
+ writeDataManifests(dataFiles, newDataFilesDataSequenceNumber,
spec(specId));
cachedNewDataManifests.addAll(newDataManifests);
});
this.hasNewDataFiles = false;