parthchandra commented on code in PR #2078:
URL: https://github.com/apache/datafusion-comet/pull/2078#discussion_r2263158429
##########
common/src/main/java/org/apache/comet/parquet/Utils.java:
##########
@@ -453,4 +457,60 @@ private static LogicalTypeAnnotation
reconstructLogicalType(
throw new IllegalArgumentException("Unknown logical type: " +
logicalTypeName);
}
}
+
+ public static ParquetColumnSpec
descriptorToParquetColumnSpec(ColumnDescriptor descriptor) {
+
+ String[] path = descriptor.getPath();
+ PrimitiveType primitiveType = descriptor.getPrimitiveType();
+ String physicalType = primitiveType.getPrimitiveTypeName().name();
+
+ int typeLength =
+ primitiveType.getPrimitiveTypeName() ==
PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
+ ? primitiveType.getTypeLength()
+ : 0;
+
+ boolean isRepeated = primitiveType.getRepetition() ==
Type.Repetition.REPEATED;
+
+ String logicalTypeName = null;
+ Map<String, String> logicalTypeParams = new HashMap<>();
+ LogicalTypeAnnotation logicalType =
primitiveType.getLogicalTypeAnnotation();
+
+ if (logicalType != null) {
+ logicalTypeName = logicalType.getClass().getSimpleName();
+
+ // Handle specific logical types
+ if (logicalType instanceof
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
+ LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimal =
+ (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalType;
+ logicalTypeParams.put("precision",
String.valueOf(decimal.getPrecision()));
+ logicalTypeParams.put("scale", String.valueOf(decimal.getScale()));
+ } else if (logicalType instanceof
LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) {
+ LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestamp =
+ (LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) logicalType;
+ logicalTypeParams.put("isAdjustedToUTC",
String.valueOf(timestamp.isAdjustedToUTC()));
+ logicalTypeParams.put("unit", timestamp.getUnit().name());
+ } else if (logicalType instanceof
LogicalTypeAnnotation.TimeLogicalTypeAnnotation) {
+ LogicalTypeAnnotation.TimeLogicalTypeAnnotation time =
+ (LogicalTypeAnnotation.TimeLogicalTypeAnnotation) logicalType;
+ logicalTypeParams.put("isAdjustedToUTC",
String.valueOf(time.isAdjustedToUTC()));
+ logicalTypeParams.put("unit", time.getUnit().name());
+ } else if (logicalType instanceof
LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
+ LogicalTypeAnnotation.IntLogicalTypeAnnotation intType =
+ (LogicalTypeAnnotation.IntLogicalTypeAnnotation) logicalType;
+ logicalTypeParams.put("isSigned", String.valueOf(intType.isSigned()));
+ logicalTypeParams.put("bitWidth",
String.valueOf(intType.getBitWidth()));
+ }
+ }
+
+ return new ParquetColumnSpec(
+ 1, // ToDo: pass in the correct id
Review Comment:
@huaxingao if this change looks good to you, I will merge this PR. Please
take a look.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]