This is an automated email from the ASF dual-hosted git repository.

zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 55e9497  PARQUET-1371: Time/Timestamp UTC normalization parameter 
doesn't work (#511)
55e9497 is described below

commit 55e94974e0547085a66c6242336e56230f996d52
Author: nandorKollar <nandorkol...@users.noreply.github.com>
AuthorDate: Tue Aug 7 17:56:42 2018 +0200

    PARQUET-1371: Time/Timestamp UTC normalization parameter doesn't work (#511)
---
 .../format/converter/ParquetMetadataConverter.java | 15 +++---
 .../converter/TestParquetMetadataConverter.java    | 61 +++++++++++++++++++++-
 2 files changed, 68 insertions(+), 8 deletions(-)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index d222505..1442910 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -799,7 +799,7 @@ public class ParquetMetadataConverter {
   }
 
   // Visible for testing
-  LogicalTypeAnnotation getOriginalType(ConvertedType type, SchemaElement 
schemaElement) {
+  LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, 
SchemaElement schemaElement) {
     switch (type) {
       case UTF8:
         return LogicalTypeAnnotation.stringType();
@@ -852,7 +852,7 @@ public class ParquetMetadataConverter {
     }
   }
 
-  LogicalTypeAnnotation getOriginalType(LogicalType type) {
+  LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
     switch (type.getSetField()) {
       case MAP:
         return LogicalTypeAnnotation.mapType();
@@ -1194,12 +1194,15 @@ public class ParquetMetadataConverter {
       }
 
       if (schemaElement.isSetLogicalType()) {
-        childBuilder.as(getOriginalType(schemaElement.logicalType));
+        childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType));
       }
       if (schemaElement.isSetConverted_type()) {
-        LogicalTypeAnnotation originalType = 
getOriginalType(schemaElement.converted_type, schemaElement);
-        LogicalTypeAnnotation newLogicalType = 
schemaElement.isSetLogicalType() ? getOriginalType(schemaElement.logicalType) : 
null;
-        if (!originalType.equals(newLogicalType)) {
+        OriginalType originalType = 
getLogicalTypeAnnotation(schemaElement.converted_type, 
schemaElement).toOriginalType();
+        OriginalType newOriginalType = (schemaElement.isSetLogicalType() && 
getLogicalTypeAnnotation(schemaElement.logicalType) != null) ?
+           
getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null;
+        if (!originalType.equals(newOriginalType)) {
+          LOG.warn("Converted type and logical type metadata mismatch 
(convertedType: {}, logical type: {}). Using value in converted type.",
+            schemaElement.converted_type, schemaElement.logicalType);
           childBuilder.as(originalType);
         }
       }
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 1474525..d1a3a3c 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -20,6 +20,8 @@ package org.apache.parquet.format.converter;
 
 import static java.util.Collections.emptyList;
 import static 
org.apache.parquet.format.converter.ParquetMetadataConverter.filterFileMetaDataByStart;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -163,6 +165,61 @@ public class TestParquetMetadataConverter {
   }
 
   @Test
+  public void testIncompatibleLogicalAndConvertedTypes() {
+    ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
+    MessageType schema = Types.buildMessage()
+      .required(PrimitiveTypeName.BINARY)
+      .as(OriginalType.DECIMAL).precision(9).scale(2)
+      .named("aBinary")
+      .named("Message");
+    MessageType expected = Types.buildMessage()
+      .required(PrimitiveTypeName.BINARY)
+      .as(LogicalTypeAnnotation.jsonType())
+      .named("aBinary")
+      .named("Message");
+
+    List<SchemaElement> parquetSchema = 
parquetMetadataConverter.toParquetSchema(schema);
+    // Set converted type field to a different type to verify that in case of 
mismatch, it overrides logical type
+    parquetSchema.get(1).setConverted_type(ConvertedType.JSON);
+    MessageType actual = 
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+    assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testTimeLogicalTypes() {
+    ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
+    MessageType expected = Types.buildMessage()
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimestampNonUtcMillis")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimestampUtcMillis")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimestampNonUtcMicros")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimestampUtcMicros")
+      .required(PrimitiveTypeName.INT32)
+      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimeNonUtcMillis")
+      .required(PrimitiveTypeName.INT32)
+      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS))
+      .named("aTimeUtcMillis")
+      .required(PrimitiveTypeName.INT64)
+      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimeNonUtcMicros")
+      .required(PrimitiveTypeName.INT64)
+      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
+      .named("aTimeUtcMicros")
+      .named("Message");
+    List<SchemaElement> parquetSchema = 
parquetMetadataConverter.toParquetSchema(expected);
+    MessageType schema = 
parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
+    assertEquals(expected, schema);
+  }
+
+  @Test
   public void testEnumEquivalence() {
     ParquetMetadataConverter parquetMetadataConverter = new 
ParquetMetadataConverter();
     for (org.apache.parquet.column.Encoding encoding : 
org.apache.parquet.column.Encoding.values()) {
@@ -184,11 +241,11 @@ public class TestParquetMetadataConverter {
       assertEquals(type, 
parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type)));
     }
     for (OriginalType original : OriginalType.values()) {
-      assertEquals(original, parquetMetadataConverter.getOriginalType(
+      assertEquals(original, parquetMetadataConverter.getLogicalTypeAnnotation(
         
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original,
 null)), null).toOriginalType());
     }
     for (ConvertedType converted : ConvertedType.values()) {
-      assertEquals(converted, 
parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getOriginalType(converted,
 null)));
+      assertEquals(converted, 
parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getLogicalTypeAnnotation(converted,
 null)));
     }
   }
 

Reply via email to