rdblue commented on code in PR #11904:
URL: https://github.com/apache/iceberg/pull/11904#discussion_r1906132860
##########
parquet/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java:
##########
@@ -92,4 +127,232 @@ protected void set(Record struct, int pos, Object value) {
struct.set(pos, value);
}
}
+
+ private class LogicalTypeAnnotationParquetValueReaderVisitor
+ implements
LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<ParquetValueReader<?>> {
+
+ private final ColumnDescriptor desc;
+ private final org.apache.iceberg.types.Type.PrimitiveType expected;
+ private final PrimitiveType primitive;
+
+ LogicalTypeAnnotationParquetValueReaderVisitor(
+ ColumnDescriptor desc,
+ org.apache.iceberg.types.Type.PrimitiveType expected,
+ PrimitiveType primitive) {
+ this.desc = desc;
+ this.expected = expected;
+ this.primitive = primitive;
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+ return Optional.of(new ParquetValueReaders.StringReader(desc));
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+ return Optional.of(new ParquetValueReaders.StringReader(desc));
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType)
{
+ switch (primitive.getPrimitiveTypeName()) {
+ case BINARY:
+ case FIXED_LEN_BYTE_ARRAY:
+ return Optional.of(
+ new ParquetValueReaders.BinaryAsDecimalReader(desc,
decimalLogicalType.getScale()));
+ case INT64:
+ return Optional.of(
+ new ParquetValueReaders.LongAsDecimalReader(desc,
decimalLogicalType.getScale()));
+ case INT32:
+ return Optional.of(
+ new ParquetValueReaders.IntegerAsDecimalReader(desc,
decimalLogicalType.getScale()));
+ default:
+ throw new UnsupportedOperationException(
+ "Unsupported base type for decimal: " +
primitive.getPrimitiveTypeName());
+ }
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+ return Optional.of(new DateReader(desc));
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+ if (timeLogicalType.getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) {
+ return Optional.of(new TimeReader(desc));
+ } else if (timeLogicalType.getUnit() ==
LogicalTypeAnnotation.TimeUnit.MILLIS) {
+ return Optional.of(new TimeMillisReader(desc));
+ }
+
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
timestampLogicalType) {
+ if (timestampLogicalType.getUnit() ==
LogicalTypeAnnotation.TimeUnit.MICROS) {
+ Types.TimestampType tsMicrosType = (Types.TimestampType) expected;
+ return tsMicrosType.shouldAdjustToUTC()
+ ? Optional.of(new TimestamptzReader(desc))
+ : Optional.of(new TimestampReader(desc));
+ } else if (timestampLogicalType.getUnit() ==
LogicalTypeAnnotation.TimeUnit.MILLIS) {
+ Types.TimestampType tsMillisType = (Types.TimestampType) expected;
+ return tsMillisType.shouldAdjustToUTC()
+ ? Optional.of(new TimestamptzMillisReader(desc))
+ : Optional.of(new TimestampMillisReader(desc));
+ }
+
+ return
LogicalTypeAnnotation.LogicalTypeAnnotationVisitor.super.visit(timestampLogicalType);
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+ if (intLogicalType.getBitWidth() == 64) {
+ return Optional.of(new ParquetValueReaders.UnboxedReader<>(desc));
+ }
+ return (expected.typeId() == org.apache.iceberg.types.Type.TypeID.LONG)
+ ? Optional.of(new ParquetValueReaders.IntAsLongReader(desc))
+ : Optional.of(new ParquetValueReaders.UnboxedReader<>(desc));
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
+ return Optional.of(new ParquetValueReaders.StringReader(desc));
+ }
+
+ @Override
+ public Optional<ParquetValueReader<?>> visit(
+ LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
+ return Optional.of(new ParquetValueReaders.BytesReader(desc));
+ }
+ }
+
+ private static final OffsetDateTime EPOCH =
Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
+ private static final LocalDate EPOCH_DAY = EPOCH.toLocalDate();
+
+ private static class DateReader extends
ParquetValueReaders.PrimitiveReader<LocalDate> {
Review Comment:
I agree with moving the date/time reader classes here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]