This is an automated email from the ASF dual-hosted git repository. uwe pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push: new 00a7a47 PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp (#594) 00a7a47 is described below commit 00a7a470dbf73d6ae3bdd0774706abcda353b178 Author: Yongyan Wang <36677000+yongy...@users.noreply.github.com> AuthorDate: Sun Jan 27 12:25:53 2019 -0800 PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp (#594) PARQUET-1504: Add an option to convert Parquet Int96 to Arrow Timestamp --- .../parquet/arrow/schema/SchemaConverter.java | 16 ++++++++++++++-- .../parquet/arrow/schema/TestSchemaConverter.java | 22 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java index 0bfb888..6275ca3 100644 --- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java +++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java @@ -86,10 +86,19 @@ import org.apache.parquet.schema.Types.GroupBuilder; */ public class SchemaConverter { + // Indicates if Int96 should be converted to Arrow Timestamp + private final boolean convertInt96ToArrowTimestamp; + /** * For when we'll need this to be configurable */ public SchemaConverter() { + this(false); + } + + // TODO(PARQUET-1511): pass the parameters in a configuration object + public SchemaConverter(final boolean convertInt96ToArrowTimestamp) { + this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp; } /** @@ -492,8 +501,11 @@ public class SchemaConverter { @Override public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException { - // Possibly timestamp - return field(new ArrowType.Binary()); + if (convertInt96ToArrowTimestamp) { + return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null)); + } else { + return field(new ArrowType.Binary()); + } } @Override diff --git a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java index e21f36c..764621a 100644 --- a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java +++ b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java @@ -47,6 +47,7 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LE import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96; import java.io.IOException; import java.util.List; @@ -439,6 +440,27 @@ public class TestSchemaConverter { Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema()); } + @Test + public void testParquetInt96ToArrowBinary() { + MessageType parquet = Types.buildMessage() + .addField(Types.optional(INT96).named("a")).named("root"); + Schema expected = new Schema(asList( + field("a", new ArrowType.Binary()) + )); + Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema()); + } + + @Test + public void testParquetInt96ToArrowTimestamp() { + final SchemaConverter converterInt96ToTimestamp = new SchemaConverter(true); + MessageType parquet = Types.buildMessage() + .addField(Types.optional(INT96).named("a")).named("root"); + Schema expected = new Schema(asList( + field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, null)) + )); + Assert.assertEquals(expected, converterInt96ToTimestamp.fromParquet(parquet).getArrowSchema()); + } + @Test(expected = IllegalStateException.class) public void testParquetInt64TimeMillisToArrow() { converter.fromParquet(Types.buildMessage()