rdsr commented on a change in pull request #1197:
URL: https://github.com/apache/iceberg/pull/1197#discussion_r454019685
##########
File path: data/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java
##########
@@ -19,590 +19,119 @@
package org.apache.iceberg.data.orc;
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
-import java.time.Instant;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.time.LocalTime;
-import java.time.OffsetDateTime;
-import java.time.ZoneOffset;
-import java.time.temporal.ChronoUnit;
import java.util.List;
-import java.util.Map;
-import java.util.UUID;
+import org.apache.iceberg.Schema;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.orc.ORCSchemaUtil;
+import org.apache.iceberg.orc.OrcSchemaWithTypeVisitor;
import org.apache.iceberg.orc.OrcValueWriter;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
import org.apache.orc.TypeDescription;
-import org.apache.orc.storage.common.type.HiveDecimal;
-import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
-import org.apache.orc.storage.ql.exec.vector.ColumnVector;
-import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
-import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
-import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
-import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
-import org.apache.orc.storage.ql.exec.vector.MapColumnVector;
-import org.apache.orc.storage.ql.exec.vector.StructColumnVector;
-import org.apache.orc.storage.ql.exec.vector.TimestampColumnVector;
import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
public class GenericOrcWriter implements OrcValueWriter<Record> {
- private final Converter[] converters;
- private static final OffsetDateTime EPOCH =
Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
- private static final LocalDate EPOCH_DAY = EPOCH.toLocalDate();
-
- private GenericOrcWriter(TypeDescription schema) {
- this.converters = buildConverters(schema);
- }
-
- public static OrcValueWriter<Record> buildWriter(TypeDescription fileSchema)
{
- return new GenericOrcWriter(fileSchema);
+ private final GenericOrcWriters.Converter converter;
+
+ private GenericOrcWriter(Schema expectedSchema, TypeDescription orcSchema) {
+ Preconditions.checkArgument(orcSchema.getCategory() ==
TypeDescription.Category.STRUCT,
+ "Top level must be a struct " + orcSchema);
+
+ converter = OrcSchemaWithTypeVisitor.visit(expectedSchema, orcSchema, new
WriteBuilder());
+ }
+
+ public static OrcValueWriter<Record> buildWriter(Schema expectedSchema,
TypeDescription fileSchema) {
+ return new GenericOrcWriter(expectedSchema, fileSchema);
+ }
+
+ private static class WriteBuilder extends
OrcSchemaWithTypeVisitor<GenericOrcWriters.Converter> {
+ private WriteBuilder() {
+ }
+
+ public GenericOrcWriters.Converter record(Types.StructType iStruct,
TypeDescription record,
Review comment:
we should add the correct type parameter in the return value . E.g for
record we should do `OrcValueWriter<Record>` and similarly for map and list
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]