This is an automated email from the ASF dual-hosted git repository. zivanfi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push: new 55e9497 PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511) 55e9497 is described below commit 55e94974e0547085a66c6242336e56230f996d52 Author: nandorKollar <nandorkol...@users.noreply.github.com> AuthorDate: Tue Aug 7 17:56:42 2018 +0200 PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511) --- .../format/converter/ParquetMetadataConverter.java | 15 +++--- .../converter/TestParquetMetadataConverter.java | 61 +++++++++++++++++++++- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index d222505..1442910 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -799,7 +799,7 @@ public class ParquetMetadataConverter { } // Visible for testing - LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement schemaElement) { + LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement schemaElement) { switch (type) { case UTF8: return LogicalTypeAnnotation.stringType(); @@ -852,7 +852,7 @@ public class ParquetMetadataConverter { } } - LogicalTypeAnnotation getOriginalType(LogicalType type) { + LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) { switch (type.getSetField()) { case MAP: return LogicalTypeAnnotation.mapType(); @@ -1194,12 +1194,15 @@ public class ParquetMetadataConverter { } if (schemaElement.isSetLogicalType()) { - childBuilder.as(getOriginalType(schemaElement.logicalType)); + childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType)); } if (schemaElement.isSetConverted_type()) { - LogicalTypeAnnotation originalType = getOriginalType(schemaElement.converted_type, schemaElement); - LogicalTypeAnnotation newLogicalType = schemaElement.isSetLogicalType() ? getOriginalType(schemaElement.logicalType) : null; - if (!originalType.equals(newLogicalType)) { + OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement).toOriginalType(); + OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? + getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null; + if (!originalType.equals(newOriginalType)) { + LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.", + schemaElement.converted_type, schemaElement.logicalType); childBuilder.as(originalType); } } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 1474525..d1a3a3c 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -20,6 +20,8 @@ package org.apache.parquet.format.converter; import static java.util.Collections.emptyList; import static org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart; +import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -163,6 +165,61 @@ public class TestParquetMetadataConverter { } @Test + public void testIncompatibleLogicalAndConvertedTypes() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + MessageType schema = Types.buildMessage() + .required(PrimitiveTypeName.BINARY) + .as(OriginalType.DECIMAL).precision(9).scale(2) + .named("aBinary") + .named("Message"); + MessageType expected = Types.buildMessage() + .required(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.jsonType()) + .named("aBinary") + .named("Message"); + + List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(schema); + // Set converted type field to a different type to verify that in case of mismatch, it overrides logical type + parquetSchema.get(1).setConverted_type(ConvertedType.JSON); + MessageType actual = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); + assertEquals(expected, actual); + } + + @Test + public void testTimeLogicalTypes() { + ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); + MessageType expected = Types.buildMessage() + .required(PrimitiveTypeName.INT64) + .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named("aTimestampNonUtcMillis") + .required(PrimitiveTypeName.INT64) + .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named("aTimestampUtcMillis") + .required(PrimitiveTypeName.INT64) + .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS)) + .named("aTimestampNonUtcMicros") + .required(PrimitiveTypeName.INT64) + .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS)) + .named("aTimestampUtcMicros") + .required(PrimitiveTypeName.INT32) + .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named("aTimeNonUtcMillis") + .required(PrimitiveTypeName.INT32) + .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)) + .named("aTimeUtcMillis") + .required(PrimitiveTypeName.INT64) + .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS)) + .named("aTimeNonUtcMicros") + .required(PrimitiveTypeName.INT64) + .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS)) + .named("aTimeUtcMicros") + .named("Message"); + List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected); + MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); + assertEquals(expected, schema); + } + + @Test public void testEnumEquivalence() { ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter(); for (org.apache.parquet.column.Encoding encoding : org.apache.parquet.column.Encoding.values()) { @@ -184,11 +241,11 @@ public class TestParquetMetadataConverter { assertEquals(type, parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type))); } for (OriginalType original : OriginalType.values()) { - assertEquals(original, parquetMetadataConverter.getOriginalType( + assertEquals(original, parquetMetadataConverter.getLogicalTypeAnnotation( parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original, null)), null).toOriginalType()); } for (ConvertedType converted : ConvertedType.values()) { - assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getOriginalType(converted, null))); + assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getLogicalTypeAnnotation(converted, null))); } }