ajantha-bhat commented on code in PR #12102:
URL: https://github.com/apache/iceberg/pull/12102#discussion_r1931637285
##########
parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java:
##########
@@ -52,6 +59,81 @@ public static <T> ParquetValueReader<T> option(
return reader;
}
+ public static ParquetValueReader<Integer> unboxed(ColumnDescriptor desc) {
+ return new UnboxedReader<>(desc);
+ }
+
+ public static ParquetValueReader<String> strings(ColumnDescriptor desc) {
+ return new StringReader(desc);
+ }
+
+ public static ParquetValueReader<ByteBuffer> byteBuffers(ColumnDescriptor
desc) {
+ return new BytesReader(desc);
+ }
+
+ public static ParquetValueReader<Long> intsAsLongs(ColumnDescriptor desc) {
+ return new IntAsLongReader(desc);
+ }
+
+ public static ParquetValueReader<Double> floatsAsDoubles(ColumnDescriptor
desc) {
+ return new FloatAsDoubleReader(desc);
+ }
+
+ public static ParquetValueReader<BigDecimal> bigDecimals(ColumnDescriptor
desc) {
+ LogicalTypeAnnotation decimal =
desc.getPrimitiveType().getLogicalTypeAnnotation();
+ Preconditions.checkArgument(
+ decimal instanceof DecimalLogicalTypeAnnotation,
+ "Invalid timestamp logical type: " + decimal);
+
+ int scale = ((DecimalLogicalTypeAnnotation) decimal).getScale();
+
+ switch (desc.getPrimitiveType().getPrimitiveTypeName()) {
+ case FIXED_LEN_BYTE_ARRAY:
+ case BINARY:
+ return new BinaryAsDecimalReader(desc, scale);
+ case INT64:
+ return new LongAsDecimalReader(desc, scale);
+ case INT32:
+ return new IntegerAsDecimalReader(desc, scale);
+ }
+ throw new IllegalArgumentException(
Review Comment:
nit: we can add a new line after switch block
##########
parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java:
##########
@@ -148,96 +167,81 @@ public ParquetValueReader<?> struct(
}
}
- private class LogicalTypeAnnotationParquetValueReaderVisitor
- implements
LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<ParquetValueReader<?>> {
+ private class LogicalTypeReadBuilder
+ implements LogicalTypeAnnotationVisitor<ParquetValueReader<?>> {
private final ColumnDescriptor desc;
private final org.apache.iceberg.types.Type.PrimitiveType expected;
- private final PrimitiveType primitive;
- LogicalTypeAnnotationParquetValueReaderVisitor(
- ColumnDescriptor desc,
- org.apache.iceberg.types.Type.PrimitiveType expected,
- PrimitiveType primitive) {
+ LogicalTypeReadBuilder(
+ ColumnDescriptor desc, org.apache.iceberg.types.Type.PrimitiveType
expected) {
this.desc = desc;
this.expected = expected;
- this.primitive = primitive;
}
@Override
- public Optional<ParquetValueReader<?>> visit(
- LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
- return Optional.of(new ParquetValueReaders.StringReader(desc));
+ public Optional<ParquetValueReader<?>> visit(StringLogicalTypeAnnotation
stringLogicalType) {
+ return Optional.of(ParquetValueReaders.strings(desc));
}
@Override
- public Optional<ParquetValueReader<?>> visit(
- LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
- return Optional.of(new ParquetValueReaders.StringReader(desc));
+ public Optional<ParquetValueReader<?>> visit(EnumLogicalTypeAnnotation
enumLogicalType) {
+ return Optional.of(ParquetValueReaders.strings(desc));
}
@Override
public Optional<ParquetValueReader<?>> visit(DecimalLogicalTypeAnnotation
decimalLogicalType) {
- switch (primitive.getPrimitiveTypeName()) {
- case BINARY:
- case FIXED_LEN_BYTE_ARRAY:
- return Optional.of(
- new ParquetValueReaders.BinaryAsDecimalReader(desc,
decimalLogicalType.getScale()));
- case INT64:
- return Optional.of(
- new ParquetValueReaders.LongAsDecimalReader(desc,
decimalLogicalType.getScale()));
- case INT32:
- return Optional.of(
- new ParquetValueReaders.IntegerAsDecimalReader(desc,
decimalLogicalType.getScale()));
- default:
- throw new UnsupportedOperationException(
- "Unsupported base type for decimal: " +
primitive.getPrimitiveTypeName());
- }
+ return Optional.of(ParquetValueReaders.bigDecimals(desc));
}
@Override
- public Optional<ParquetValueReader<?>> visit(
- LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+ public Optional<ParquetValueReader<?>> visit(DateLogicalTypeAnnotation
dateLogicalType) {
return Optional.of(dateReader(desc));
}
@Override
- public Optional<ParquetValueReader<?>> visit(
- LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
- return Optional.of(timeReader(desc, timeLogicalType.getUnit()));
+ public Optional<ParquetValueReader<?>> visit(TimeLogicalTypeAnnotation
timeLogicalType) {
+ return Optional.of(timeReader(desc));
}
@Override
public Optional<ParquetValueReader<?>> visit(
- LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
timestampLogicalType) {
+ TimestampLogicalTypeAnnotation timestampLogicalType) {
return Optional.of(
- timestampReader(
- desc,
- timestampLogicalType.getUnit(),
- ((Types.TimestampType) expected).shouldAdjustToUTC()));
+ timestampReader(desc, ((Types.TimestampType)
expected).shouldAdjustToUTC()));
}
@Override
- public Optional<ParquetValueReader<?>> visit(
- LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+ public Optional<ParquetValueReader<?>> visit(IntLogicalTypeAnnotation
intLogicalType) {
if (intLogicalType.getBitWidth() == 64) {
+ if (intLogicalType.isSigned()) {
+ // this will throw an UnsupportedOperationException
Review Comment:
nit: Just wondering why not replace it with preconditions check like newly
added code below, with that we can have a valid error message that "64 bit int
logical type must be unsigned" .
Now it just says unsupported logical type INTEGER and it might confuse the
user.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]