rdblue commented on a change in pull request #1213:
URL: https://github.com/apache/iceberg/pull/1213#discussion_r459165341
##########
File path: core/src/main/java/org/apache/iceberg/taskio/PartitionedWriter.java
##########
@@ -17,41 +17,40 @@
* under the License.
*/
-package org.apache.iceberg.spark.source;
+package org.apache.iceberg.taskio;
import java.io.IOException;
import java.util.Set;
+import java.util.function.Function;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.PartitionKey;
import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
-import org.apache.iceberg.spark.SparkSchemaUtil;
-import org.apache.spark.sql.catalyst.InternalRow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-class PartitionedWriter extends BaseWriter {
+public class PartitionedWriter<T> extends BaseTaskWriter<T> {
private static final Logger LOG =
LoggerFactory.getLogger(PartitionedWriter.class);
- private final PartitionKey key;
- private final InternalRowWrapper wrapper;
+ private final Function<T, PartitionKey> keyGetter;
private final Set<PartitionKey> completedPartitions = Sets.newHashSet();
- PartitionedWriter(PartitionSpec spec, FileFormat format,
SparkAppenderFactory appenderFactory,
- OutputFileFactory fileFactory, FileIO io, long
targetFileSize, Schema writeSchema) {
+ private PartitionKey currentKey = null;
+ private WrappedFileAppender currentAppender = null;
+
+ public PartitionedWriter(PartitionSpec spec, FileFormat format,
FileAppenderFactory<T> appenderFactory,
+ OutputFileFactory fileFactory, FileIO io, long
targetFileSize,
+ Function<T, PartitionKey> keyGetter) {
super(spec, format, appenderFactory, fileFactory, io, targetFileSize);
- this.key = new PartitionKey(spec, writeSchema);
- this.wrapper = new
InternalRowWrapper(SparkSchemaUtil.convert(writeSchema));
+ this.keyGetter = keyGetter;
Review comment:
Like the other partitioned writer, I think this should use an abstract
method to be implemented by subclasses.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]