stevenzwu commented on code in PR #12298:
URL: https://github.com/apache/iceberg/pull/12298#discussion_r2377014431
##########
flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkAppenderFactory.java:
##########
@@ -55,12 +53,11 @@ public class FlinkAppenderFactory implements
FileAppenderFactory<RowData>, Seria
private final PartitionSpec spec;
private final int[] equalityFieldIds;
private final Schema eqDeleteRowSchema;
- private final Schema posDeleteRowSchema;
private final Table table;
private RowType eqDeleteFlinkSchema = null;
- private RowType posDeleteFlinkSchema = null;
+ @Deprecated
Review Comment:
nit: add Javadoc on the versions to remove like other deprecation notices
##########
core/src/main/java/org/apache/iceberg/deletes/EqualityDeleteWriter.java:
##########
@@ -52,12 +52,44 @@ public EqualityDeleteWriter(
EncryptionKeyMetadata keyMetadata,
SortOrder sortOrder,
int... equalityFieldIds) {
+ this(
+ appender,
+ format,
+ location,
+ spec,
+ partition,
+ keyMetadata != null ? keyMetadata.buffer() : null,
+ sortOrder,
+ equalityFieldIds);
+ }
+
+ public EqualityDeleteWriter(EqualityDeleteWriter<T> wrapped) {
+ this(
+ wrapped.appender,
+ wrapped.format,
+ wrapped.location,
+ wrapped.spec,
+ wrapped.partition,
+ wrapped.keyMetadata,
+ wrapped.sortOrder,
+ wrapped.equalityFieldIds);
+ }
+
+ private EqualityDeleteWriter(
Review Comment:
This refactoring doesn't seem to be required. it also doesn't seem to save
any lines of code.
##########
flink/v2.0/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkFileWriterFactory.java:
##########
@@ -65,111 +54,26 @@ class FlinkFileWriterFactory extends
BaseFileWriterFactory<RowData> implements S
super(
table,
dataFileFormat,
+ RowData.class,
dataSchema,
dataSortOrder,
deleteFileFormat,
equalityFieldIds,
equalityDeleteRowSchema,
equalityDeleteSortOrder,
- positionDeleteRowSchema);
-
- this.dataFlinkType = dataFlinkType;
- this.equalityDeleteFlinkType = equalityDeleteFlinkType;
- this.positionDeleteFlinkType = positionDeleteFlinkType;
+ ImmutableMap.of(),
+ dataFlinkType == null ? FlinkSchemaUtil.convert(dataSchema) :
dataFlinkType,
+ equalityDeleteFlinkType == null
Review Comment:
nit: nested ternary operator is a bit hard to read. maybe change it to a
small method with if-else code?
##########
spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java:
##########
@@ -293,4 +217,48 @@ SparkFileWriterFactory build() {
writeProperties);
}
}
+
+ private static StructType calculateSparkTypeForDelete(StructType sparkType,
Schema schema) {
+ if (sparkType != null) {
+ // The delete types need to have the correct metadata columns.
+ if (sparkType.fields().length < 3) {
+ return PATH_POS_TYPE;
+ } else {
+ StructField rowField =
+
sparkType.fields()[sparkType.fieldIndex(MetadataColumns.DELETE_FILE_ROW_FIELD_NAME)];
+ return new StructType(
+ new StructField[] {
+ new StructField(
+ MetadataColumns.DELETE_FILE_PATH.name(),
+ DataTypes.StringType,
+ false,
+ Metadata.empty()),
+ new StructField(
+ MetadataColumns.DELETE_FILE_POS.name(),
+ DataTypes.LongType,
+ false,
+ Metadata.empty()),
+ new StructField(
+ MetadataColumns.DELETE_FILE_ROW_FIELD_NAME,
+ rowField.dataType(),
+ false,
+ Metadata.empty())
+ });
+ }
+ } else if (schema != null) {
+ return SparkSchemaUtil.convert(schema);
+ } else {
+ return null;
+ }
+ }
+
+ private static StructType calculateSparkType(StructType sparkType, Schema
schema) {
+ if (sparkType != null) {
+ return sparkType;
+ } else if (schema != null) {
+ return SparkSchemaUtil.convert(schema);
+ } else {
+ return null;
Review Comment:
this should throw an exception. one of the two args should be non null. that
will also match the old behavior
##########
spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkFileWriterFactory.java:
##########
@@ -293,4 +217,48 @@ SparkFileWriterFactory build() {
writeProperties);
}
}
+
+ private static StructType calculateSparkTypeForDelete(StructType sparkType,
Schema schema) {
Review Comment:
this doesn't seem to be used.
##########
parquet/src/main/java/org/apache/iceberg/parquet/ParquetFormatModel.java:
##########
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.parquet;
+
+import java.util.Map;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.io.FormatModel;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.parquet.schema.MessageType;
+
+public class ParquetFormatModel<D, S, F> implements FormatModel<D, S> {
+ private final Class<D> type;
+ private final ReaderFunction<D> readerFunction;
+ private final BatchReaderFunction<D, F> batchReaderFunction;
+ private final WriterFunction<D, S> writerFunction;
+
+ private ParquetFormatModel(
+ Class<D> type,
+ ReaderFunction<D> readerFunction,
+ BatchReaderFunction<D, F> batchReaderFunction,
+ WriterFunction<D, S> writerFunction) {
+ this.type = type;
+ this.readerFunction = readerFunction;
+ this.batchReaderFunction = batchReaderFunction;
+ this.writerFunction = writerFunction;
+ }
+
+ public ParquetFormatModel(Class<D> type) {
Review Comment:
is there any value to provide this constructor where reader and writer
functions are null? is the constructed object usable?
##########
core/src/main/java/org/apache/iceberg/data/RegistryBasedFileWriterFactory.java:
##########
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.data;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Map;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.MetricsConfig;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.deletes.EqualityDeleteWriter;
+import org.apache.iceberg.deletes.PositionDeleteWriter;
+import org.apache.iceberg.encryption.EncryptedOutputFile;
+import org.apache.iceberg.encryption.EncryptionKeyMetadata;
+import org.apache.iceberg.io.DataWriter;
+import org.apache.iceberg.io.FileWriterFactory;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+
+/**
+ * A base writer factory to be extended by query engine integrations.
+ *
+ * @param <T> type of the engine specific records
Review Comment:
nit: simplify as `<T> row type`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]