rdblue commented on a change in pull request #1197:
URL: https://github.com/apache/iceberg/pull/1197#discussion_r455328354



##########
File path: data/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java
##########
@@ -19,590 +19,119 @@
 
 package org.apache.iceberg.data.orc;
 
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
-import java.time.Instant;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.time.LocalTime;
-import java.time.OffsetDateTime;
-import java.time.ZoneOffset;
-import java.time.temporal.ChronoUnit;
 import java.util.List;
-import java.util.Map;
-import java.util.UUID;
+import org.apache.iceberg.Schema;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.orc.ORCSchemaUtil;
+import org.apache.iceberg.orc.OrcSchemaWithTypeVisitor;
 import org.apache.iceberg.orc.OrcValueWriter;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
 import org.apache.orc.TypeDescription;
-import org.apache.orc.storage.common.type.HiveDecimal;
-import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
-import org.apache.orc.storage.ql.exec.vector.ColumnVector;
-import org.apache.orc.storage.ql.exec.vector.DecimalColumnVector;
-import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
-import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
-import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
-import org.apache.orc.storage.ql.exec.vector.MapColumnVector;
-import org.apache.orc.storage.ql.exec.vector.StructColumnVector;
-import org.apache.orc.storage.ql.exec.vector.TimestampColumnVector;
 import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;
 
 public class GenericOrcWriter implements OrcValueWriter<Record> {
-  private final Converter[] converters;
-  private static final OffsetDateTime EPOCH = 
Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
-  private static final LocalDate EPOCH_DAY = EPOCH.toLocalDate();
-
-  private GenericOrcWriter(TypeDescription schema) {
-    this.converters = buildConverters(schema);
-  }
-
-  public static OrcValueWriter<Record> buildWriter(TypeDescription fileSchema) 
{
-    return new GenericOrcWriter(fileSchema);
+  private final GenericOrcWriters.Converter converter;
+
+  private GenericOrcWriter(Schema expectedSchema, TypeDescription orcSchema) {
+    Preconditions.checkArgument(orcSchema.getCategory() == 
TypeDescription.Category.STRUCT,
+        "Top level must be a struct " + orcSchema);
+
+    converter = OrcSchemaWithTypeVisitor.visit(expectedSchema, orcSchema, new 
WriteBuilder());
+  }
+
+  public static OrcValueWriter<Record> buildWriter(Schema expectedSchema, 
TypeDescription fileSchema) {
+    return new GenericOrcWriter(expectedSchema, fileSchema);
+  }
+
+  private static class WriteBuilder extends 
OrcSchemaWithTypeVisitor<GenericOrcWriters.Converter> {
+    private WriteBuilder() {
+    }
+
+    public GenericOrcWriters.Converter record(Types.StructType iStruct, 
TypeDescription record,
+                                              List<String> names, 
List<GenericOrcWriters.Converter> fields) {
+      return new GenericOrcWriters.RecordConverter(fields);
+    }
+
+    public GenericOrcWriters.Converter list(Types.ListType iList, 
TypeDescription array,
+                                            GenericOrcWriters.Converter 
element) {
+      return new GenericOrcWriters.ListConverter(element);
+    }
+
+    public GenericOrcWriters.Converter map(Types.MapType iMap, TypeDescription 
map,
+                                           GenericOrcWriters.Converter key, 
GenericOrcWriters.Converter value) {
+      return new GenericOrcWriters.MapConverter(key, value);
+    }
+
+    public GenericOrcWriters.Converter primitive(Type.PrimitiveType 
iPrimitive, TypeDescription schema) {
+      switch (schema.getCategory()) {
+        case BOOLEAN:
+          return GenericOrcWriters.booleans();
+        case BYTE:
+          return GenericOrcWriters.bytes();
+        case SHORT:
+          return GenericOrcWriters.shorts();
+        case DATE:
+          return GenericOrcWriters.dates();
+        case INT:
+          return GenericOrcWriters.ints();
+        case LONG:
+          String longAttributeValue = 
schema.getAttributeValue(ORCSchemaUtil.ICEBERG_LONG_TYPE_ATTRIBUTE);
+          ORCSchemaUtil.LongType longType = longAttributeValue == null ? 
ORCSchemaUtil.LongType.LONG :
+              ORCSchemaUtil.LongType.valueOf(longAttributeValue);
+          switch (longType) {
+            case TIME:
+              return GenericOrcWriters.times();
+            case LONG:
+              return GenericOrcWriters.longs();
+            default:
+              throw new IllegalStateException("Unhandled Long type found in 
ORC type attribute: " + longType);
+          }
+        case FLOAT:
+          return GenericOrcWriters.floats();
+        case DOUBLE:
+          return GenericOrcWriters.doubles();
+        case BINARY:
+          String binaryAttributeValue = 
schema.getAttributeValue(ORCSchemaUtil.ICEBERG_BINARY_TYPE_ATTRIBUTE);
+          ORCSchemaUtil.BinaryType binaryType = binaryAttributeValue == null ? 
ORCSchemaUtil.BinaryType.BINARY :
+              ORCSchemaUtil.BinaryType.valueOf(binaryAttributeValue);
+          switch (binaryType) {
+            case UUID:
+              return GenericOrcWriters.uuids();
+            case FIXED:
+              return GenericOrcWriters.fixed();
+            case BINARY:
+              return GenericOrcWriters.binary();
+            default:
+              throw new IllegalStateException("Unhandled Binary type found in 
ORC type attribute: " + binaryType);
+          }
+        case STRING:
+        case CHAR:
+        case VARCHAR:
+          return GenericOrcWriters.strings();
+        case DECIMAL:
+          return schema.getPrecision() <= 18 ? 
GenericOrcWriters.decimal18(schema) :
+              GenericOrcWriters.decimal38(schema);
+        case TIMESTAMP:
+          return GenericOrcWriters.timestamp();
+        case TIMESTAMP_INSTANT:
+          return GenericOrcWriters.timestampTz();
+      }
+      throw new IllegalArgumentException("Unhandled type " + schema);
+    }
   }
 
   @SuppressWarnings("unchecked")

Review comment:
       +1
   
   We should avoid using types without parameters.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to