This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 18de78eacb Core: Fix support for GenericManifestFile intex projection
(#12522)
18de78eacb is described below
commit 18de78eacba1f64de88fbe91c42ea1740377a1d2
Author: Daniel Weeks <[email protected]>
AuthorDate: Fri Mar 14 02:19:44 2025 -0700
Core: Fix support for GenericManifestFile intex projection (#12522)
---
.../org/apache/iceberg/GenericManifestFile.java | 21 ++------
.../java/org/apache/iceberg/TestTableMetadata.java | 58 ++++++++++++++++++++++
2 files changed, 62 insertions(+), 17 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
b/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
index a079f5fb7b..9ccd59893d 100644
--- a/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
+++ b/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
@@ -33,6 +33,7 @@ import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Objects;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ByteBuffers;
public class GenericManifestFile extends SupportsIndexProjection
@@ -67,24 +68,10 @@ public class GenericManifestFile extends
SupportsIndexProjection
this.avroSchema = avroSchema;
}
- GenericManifestFile(InputFile file, int specId) {
- super(ManifestFile.schema().columns().size());
+ /** Used by Avro reflection to instantiate this class when reading manifest
files. */
+ GenericManifestFile(Types.StructType projectedSchema) {
+ super(ManifestFile.schema().asStruct(), projectedSchema);
this.avroSchema = AVRO_SCHEMA;
- this.file = file;
- this.manifestPath = file.location();
- this.length = null; // lazily loaded from file
- this.specId = specId;
- this.sequenceNumber = 0;
- this.minSequenceNumber = 0;
- this.snapshotId = null;
- this.addedFilesCount = null;
- this.addedRowsCount = null;
- this.existingFilesCount = null;
- this.existingRowsCount = null;
- this.deletedFilesCount = null;
- this.deletedRowsCount = null;
- this.partitions = null;
- this.keyMetadata = null;
}
GenericManifestFile(InputFile file, int specId, long snapshotId) {
diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
index bff9e2a3a9..cb1ac9b953 100644
--- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
+++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
@@ -57,6 +57,7 @@ import org.apache.iceberg.TableMetadata.MetadataLogEntry;
import org.apache.iceberg.TableMetadata.SnapshotLogEntry;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
@@ -1616,6 +1617,63 @@ public class TestTableMetadata {
.doesNotContainKey(TableProperties.FORMAT_VERSION);
}
+ @Test
+ public void testManifestListIndexProjection() throws IOException {
+ long previousSnapshotId = System.currentTimeMillis() - new
Random(1234).nextInt(3600);
+
+ String manifestListLocation =
+ createManifestListWithManifestFile(previousSnapshotId, null,
"file:/tmp/manifest1.avro");
+
+ Schema manifestProjection =
+ ManifestFile.schema()
+ .select(
+ ManifestFile.PATH.name(),
+ ManifestFile.LENGTH.name(),
+ ManifestFile.SPEC_ID.name(),
+ ManifestFile.SNAPSHOT_ID.name());
+
+ try (CloseableIterable<ManifestFile> manifestIterable =
+ InternalData.read(FileFormat.AVRO, localInput(manifestListLocation))
+ .setRootType(GenericManifestFile.class)
+ .project(manifestProjection)
+ .reuseContainers()
+ .build()) {
+
+ List<ManifestFile> manifests = Lists.newArrayList(manifestIterable);
+ assertThat(manifests).hasSize(1);
+
+ manifests.forEach(
+ manifest -> {
+ // projected fields
+ assertThat(manifest)
+ .extracting(
+ ManifestFile::path,
+ ManifestFile::length,
+ ManifestFile::partitionSpecId,
+ ManifestFile::snapshotId)
+ .doesNotContainNull();
+
+ // not projected with defaults
+ assertThat(manifest.sequenceNumber()).isEqualTo(0);
+ assertThat(manifest.minSequenceNumber()).isEqualTo(0);
+ assertThat(manifest.content()).isEqualTo(ManifestContent.DATA);
+
+ // not projected
+ assertThat(manifest)
+ .extracting(
+ ManifestFile::addedFilesCount,
+ ManifestFile::existingFilesCount,
+ ManifestFile::deletedFilesCount,
+ ManifestFile::addedRowsCount,
+ ManifestFile::existingRowsCount,
+ ManifestFile::deletedRowsCount,
+ ManifestFile::partitions,
+ ManifestFile::keyMetadata)
+ .containsOnlyNulls();
+ });
+ }
+ }
+
private static Stream<Arguments> upgradeFormatVersionProvider() {
// return a stream of all valid upgrade paths
return IntStream.range(1, TableMetadata.SUPPORTED_TABLE_FORMAT_VERSION)