SourabhBadhya commented on code in PR #5254: URL: https://github.com/apache/hive/pull/5254#discussion_r1677326958
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergAcidUtil.java: ########## @@ -252,13 +289,63 @@ public boolean hasNext() { public T next() { T next = currentIterator.next(); GenericRecord rec = (GenericRecord) next; + IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(), current.size(), current); + int specId = IcebergAcidUtil.parseSpecId(rec); PositionDeleteInfo.setIntoConf(conf, - IcebergAcidUtil.parseSpecId(rec), + specId, IcebergAcidUtil.computePartitionHash(rec), IcebergAcidUtil.parseFilePath(rec), - IcebergAcidUtil.parseFilePosition(rec)); - IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(), current.size(), current); + IcebergAcidUtil.parseFilePosition(rec), + IcebergAcidUtil.getSerializedPartitionKey(current, table.specs().get(specId))); return (T) current; } } + + public static class MergeVirtualColumnAwareIterator<T> implements CloseableIterator<T> { + + private final CloseableIterator<T> currentIterator; + private GenericRecord current; + private final Schema expectedSchema; + private final Configuration conf; + private final int specId; + private final PartitionSpec partitionSpec; + private final StructLike partition; + + public MergeVirtualColumnAwareIterator(CloseableIterator<T> currentIterator, + Schema expectedSchema, Configuration conf, ContentFile contentFile, + Table table) { + this.currentIterator = currentIterator; + this.expectedSchema = expectedSchema; + this.conf = conf; + this.partition = contentFile.partition(); + current = GenericRecord.create( + new Schema(expectedSchema.columns().subList(0, expectedSchema.columns().size()))); + this.specId = contentFile.specId(); + + this.partitionSpec = table.specs().get(specId); + } + + @Override + public void close() throws IOException { + currentIterator.close(); + } + + @Override + public boolean hasNext() { + return currentIterator.hasNext(); + } + + @Override + public T next() { + T next = currentIterator.next(); + GenericRecord rec = (GenericRecord) next; + current.set(SERDE_META_COLS.get(MetadataColumns.SPEC_ID), specId); + current.set(SERDE_META_COLS.get(PARTITION_HASH_META_COL), computeHash(partition)); + current.set(SERDE_META_COLS.get(MetadataColumns.FILE_PATH), rec.get(0, String.class)); Review Comment: > IcebergAcidUtil.parseFilePath(rec) > IcebergAcidUtil.parseFilePosition(rec) These are used for getting the file path and file position in the writer side. Whereas the lines here make sure the data is read according to their index. Added more information on these indices in the latest commit. ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergAcidUtil.java: ########## @@ -252,13 +289,63 @@ public boolean hasNext() { public T next() { T next = currentIterator.next(); GenericRecord rec = (GenericRecord) next; + IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(), current.size(), current); + int specId = IcebergAcidUtil.parseSpecId(rec); PositionDeleteInfo.setIntoConf(conf, - IcebergAcidUtil.parseSpecId(rec), + specId, IcebergAcidUtil.computePartitionHash(rec), IcebergAcidUtil.parseFilePath(rec), - IcebergAcidUtil.parseFilePosition(rec)); - IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(), current.size(), current); + IcebergAcidUtil.parseFilePosition(rec), + IcebergAcidUtil.getSerializedPartitionKey(current, table.specs().get(specId))); return (T) current; } } + + public static class MergeVirtualColumnAwareIterator<T> implements CloseableIterator<T> { + + private final CloseableIterator<T> currentIterator; + private GenericRecord current; + private final Schema expectedSchema; + private final Configuration conf; + private final int specId; Review Comment: Done. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org