rdblue commented on code in PR #15854:
URL: https://github.com/apache/iceberg/pull/15854#discussion_r3048506799


##########
core/src/main/java/org/apache/iceberg/TrackedFileStruct.java:
##########
@@ -0,0 +1,737 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.stats.BaseContentStats;
+import org.apache.iceberg.stats.ContentStats;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ArrayUtil;
+import org.apache.iceberg.util.ByteBuffers;
+
+/** Mutable {@link StructLike} implementation of {@link TrackedFile}. */
+class TrackedFileStruct implements TrackedFile, StructLike {
+  private static final FileContent[] FILE_CONTENT_VALUES = 
FileContent.values();
+  private static final EntryStatus[] STATUS_VALUES = EntryStatus.values();
+
+  private TrackingStruct tracking;
+  private FileContent contentType = FileContent.DATA;
+  private String location;
+  private FileFormat fileFormat;
+  private long recordCount;
+  private long fileSizeInBytes;
+  private Integer specId;
+  private ContentStats contentStats;
+  private Integer sortOrderId;
+  private DeletionVectorStruct deletionVector;
+  private ManifestInfoStruct manifestInfo;
+  private ByteBuffer keyMetadata;
+  private long[] splitOffsets;
+  private int[] equalityIds;
+  private String manifestLocation;
+  private long manifestPos;
+
+  TrackedFileStruct(Types.StructType type) {}
+
+  /** Copy constructor. */
+  private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, 
Set<Integer> statsIds) {
+    this.tracking = toCopy.tracking != null ? toCopy.tracking.copy() : null;
+    this.contentType = toCopy.contentType;
+    this.location = toCopy.location;
+    this.fileFormat = toCopy.fileFormat;
+    this.recordCount = toCopy.recordCount;
+    this.fileSizeInBytes = toCopy.fileSizeInBytes;
+    this.specId = toCopy.specId;
+    this.sortOrderId = toCopy.sortOrderId;
+    this.deletionVector = toCopy.deletionVector != null ? 
toCopy.deletionVector.copy() : null;
+
+    if (withStats && toCopy.contentStats != null) {
+      ContentStats filtered = BaseContentStats.buildFrom(toCopy.contentStats, 
statsIds).build();
+      this.contentStats = filtered.fieldStats().isEmpty() ? null : filtered;
+    } else {
+      this.contentStats = null;
+    }
+
+    this.manifestInfo = toCopy.manifestInfo != null ? 
toCopy.manifestInfo.copy() : null;
+    this.keyMetadata = toCopy.keyMetadata != null ? 
ByteBuffers.copy(toCopy.keyMetadata) : null;
+    this.splitOffsets =
+        toCopy.splitOffsets != null
+            ? Arrays.copyOf(toCopy.splitOffsets, toCopy.splitOffsets.length)
+            : null;
+    this.equalityIds =
+        toCopy.equalityIds != null
+            ? Arrays.copyOf(toCopy.equalityIds, toCopy.equalityIds.length)
+            : null;
+    this.manifestLocation = toCopy.manifestLocation;
+    this.manifestPos = toCopy.manifestPos;
+  }
+
+  @Override
+  public Tracking tracking() {
+    return tracking;
+  }
+
+  TrackingStruct trackingStruct() {
+    return tracking;
+  }
+
+  @Override
+  public FileContent contentType() {
+    return contentType;
+  }
+
+  @Override
+  public String location() {
+    return location;
+  }
+
+  @Override
+  public FileFormat fileFormat() {
+    return fileFormat;
+  }
+
+  @Override
+  public long recordCount() {
+    return recordCount;
+  }
+
+  @Override
+  public long fileSizeInBytes() {
+    return fileSizeInBytes;
+  }
+
+  @Override
+  public Integer specId() {
+    return specId;
+  }
+
+  @Override
+  public ContentStats contentStats() {
+    return (ContentStats) contentStats;
+  }
+
+  @Override
+  public Integer sortOrderId() {
+    return sortOrderId;
+  }
+
+  @Override
+  public DeletionVector deletionVector() {
+    return deletionVector;
+  }
+
+  @Override
+  public ManifestInfo manifestInfo() {
+    return manifestInfo;
+  }
+
+  @Override
+  public ByteBuffer keyMetadata() {
+    return keyMetadata;
+  }
+
+  @Override
+  public List<Long> splitOffsets() {
+    return splitOffsets != null ? 
ArrayUtil.toUnmodifiableLongList(splitOffsets) : null;
+  }
+
+  @Override
+  public List<Integer> equalityIds() {
+    return equalityIds != null ? ArrayUtil.toUnmodifiableIntList(equalityIds) 
: null;
+  }
+
+  @Override
+  public TrackedFile copy() {
+    return new TrackedFileStruct(this, true, null);
+  }
+
+  @Override
+  public TrackedFile copyWithStats(Set<Integer> requestedColumnIds) {
+    return new TrackedFileStruct(this, true, requestedColumnIds);
+  }
+
+  @Override
+  public String manifestLocation() {
+    return manifestLocation;
+  }
+
+  @Override
+  public long manifestPos() {
+    return manifestPos;
+  }
+
+  void setManifestLocation(String newManifestLocation) {
+    this.manifestLocation = newManifestLocation;
+  }
+
+  void setManifestPos(long newManifestPos) {
+    this.manifestPos = newManifestPos;
+  }
+
+  // StructLike implementation - field ordinals match 
TrackedFile.schemaWithContentStats() order
+
+  @Override
+  public int size() {
+    return 14;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public <T> T get(int pos, Class<T> javaClass) {
+    Object value;
+    switch (pos) {
+      case 0:
+        value = tracking;
+        break;
+      case 1:
+        value = contentType.id();
+        break;
+      case 2:
+        value = location;
+        break;
+      case 3:
+        value = fileFormat != null ? fileFormat.toString() : null;
+        break;
+      case 4:
+        value = recordCount;
+        break;
+      case 5:
+        value = fileSizeInBytes;
+        break;
+      case 6:
+        value = specId;
+        break;
+      case 7:
+        value = contentStats;
+        break;
+      case 8:
+        value = sortOrderId;
+        break;
+      case 9:
+        value = deletionVector;
+        break;
+      case 10:
+        value = manifestInfo;
+        break;
+      case 11:
+        value = keyMetadata;
+        break;
+      case 12:
+        value = splitOffsets();
+        break;
+      case 13:
+        value = equalityIds();
+        break;
+      default:
+        throw new UnsupportedOperationException("Unknown field ordinal: " + 
pos);
+    }
+
+    return (T) value;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public <T> void set(int pos, T value) {
+    switch (pos) {
+      case 0:
+        this.tracking = (TrackingStruct) value;
+        break;
+      case 1:
+        this.contentType = FILE_CONTENT_VALUES[(Integer) value];
+        break;
+      case 2:
+        this.location = (String) value;
+        break;
+      case 3:
+        this.fileFormat = FileFormat.fromString(value.toString());
+        break;
+      case 4:
+        this.recordCount = (Long) value;
+        break;
+      case 5:
+        this.fileSizeInBytes = (Long) value;
+        break;
+      case 6:
+        this.specId = (Integer) value;
+        break;
+      case 7:
+        Preconditions.checkArgument(
+            value == null || value instanceof ContentStats,
+            "Expected ContentStats but found: %s",
+            value == null ? "null" : value.getClass().getName());
+
+        this.contentStats = (ContentStats) value;
+        break;
+      case 8:
+        this.sortOrderId = (Integer) value;
+        break;
+      case 9:
+        this.deletionVector = (DeletionVectorStruct) value;
+        break;
+      case 10:
+        this.manifestInfo = (ManifestInfoStruct) value;
+        break;
+      case 11:
+        this.keyMetadata = (ByteBuffer) value;
+        break;
+      case 12:
+        this.splitOffsets = ArrayUtil.toLongArray((List<Long>) value);
+        break;
+      case 13:
+        this.equalityIds = ArrayUtil.toIntArray((List<Integer>) value);
+        break;
+      default:
+        throw new UnsupportedOperationException("Unknown field ordinal: " + 
pos);
+    }
+  }
+
+  /** Mutable {@link StructLike} implementation of {@link Tracking}. */
+  static class TrackingStruct implements Tracking, StructLike {
+    private EntryStatus status = EntryStatus.EXISTING;
+    private Long snapshotId;
+    private Long sequenceNumber;
+    private Long fileSequenceNumber;
+    private Long dvSnapshotId;
+    private Long firstRowId;
+    private ByteBuffer deletedPositions;
+    private ByteBuffer replacedPositions;
+
+    TrackingStruct(Types.StructType type) {}
+
+    private TrackingStruct(TrackingStruct toCopy) {
+      this.status = toCopy.status;
+      this.snapshotId = toCopy.snapshotId;
+      this.sequenceNumber = toCopy.sequenceNumber;
+      this.fileSequenceNumber = toCopy.fileSequenceNumber;
+      this.dvSnapshotId = toCopy.dvSnapshotId;
+      this.firstRowId = toCopy.firstRowId;
+      this.deletedPositions =
+          toCopy.deletedPositions != null ? 
ByteBuffers.copy(toCopy.deletedPositions) : null;
+      this.replacedPositions =
+          toCopy.replacedPositions != null ? 
ByteBuffers.copy(toCopy.replacedPositions) : null;
+    }
+
+    TrackingStruct copy() {
+      return new TrackingStruct(this);
+    }
+
+    @Override
+    public EntryStatus status() {
+      return status;
+    }
+
+    @Override
+    public Long snapshotId() {
+      return snapshotId;
+    }
+
+    @Override
+    public Long dataSequenceNumber() {
+      return sequenceNumber;
+    }
+
+    @Override
+    public Long fileSequenceNumber() {
+      return fileSequenceNumber;
+    }
+
+    @Override
+    public Long dvSnapshotId() {
+      return dvSnapshotId;
+    }
+
+    @Override
+    public Long firstRowId() {
+      return firstRowId;
+    }
+
+    @Override
+    public ByteBuffer deletedPositions() {
+      return deletedPositions;
+    }
+
+    @Override
+    public ByteBuffer replacedPositions() {
+      return replacedPositions;
+    }
+
+    void setSnapshotId(Long newSnapshotId) {
+      this.snapshotId = newSnapshotId;
+    }
+
+    void setSequenceNumber(Long newSequenceNumber) {
+      this.sequenceNumber = newSequenceNumber;
+    }
+
+    void setFirstRowId(Long newFirstRowId) {

Review Comment:
   I realize that this is copying the API used by 
[`InheritableMetadataFactory`](https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/InheritableMetadataFactory.java#L64)
 but I think there is a better way to inherit metadata, rather than exposing 
setters, checking for a specific class (`BaseFile`), and setting defaults on 
each row.
   
   I think it would be cleaner to have a method that sets the `ManifestFile` 
that this file is being read from. Then the `set` method can detect null values 
and inject the correct default from manifest. That would work well for snapshot 
ID, sequence numbers, manifest location, and spec ID. This might allow us to 
fix up the interface because those values would always be set (but may not if 
we want to be able to access the values for a file before it is written to a 
manifest).
   
   That doesn't work well for the position ordinal, but we want the reader to 
supply that using the 
[`PositionReader`](https://github.com/apache/iceberg/blob/main/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java#L351).
 It also doesn't work well for first row ID, but we could also set a tracker 
for it that is based on the manifest file and is separate.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to