This is an automated email from the ASF dual-hosted git repository. thw pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git
The following commit(s) were added to refs/heads/master by this push: new 548e4b5188b [FLINK-25565][Formats][Parquet] timestamp int64 option tidy up (#23900) 548e4b5188b is described below commit 548e4b5188bb3f092206182d779a909756408660 Author: Thomas Weise <twe...@users.noreply.github.com> AuthorDate: Mon Dec 11 08:05:53 2023 -0500 [FLINK-25565][Formats][Parquet] timestamp int64 option tidy up (#23900) --- docs/content/docs/connectors/table/formats/parquet.md | 5 +++-- .../flink/formats/parquet/vector/reader/TimestampColumnReader.java | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/content/docs/connectors/table/formats/parquet.md b/docs/content/docs/connectors/table/formats/parquet.md index 75c524f238f..0e53f4b919e 100644 --- a/docs/content/docs/connectors/table/formats/parquet.md +++ b/docs/content/docs/connectors/table/formats/parquet.md @@ -107,9 +107,10 @@ For example, you can configure `parquet.compression=GZIP` to enable gzip compres Data Type Mapping ---------------- -Currently, Parquet format type mapping is compatible with Apache Hive, but different with Apache Spark: +Currently, Parquet format type mapping is compatible with Apache Hive, but by default not with Apache Spark: - Timestamp: mapping timestamp type to int96 whatever the precision is. +- Spark compatibility requires int64 via config option `timestamp.time.unit` (see above). - Decimal: mapping decimal type to fixed length byte array according to the precision. The following table lists the type mapping from Flink type to Parquet type. @@ -185,7 +186,7 @@ The following table lists the type mapping from Flink type to Parquet type. </tr> <tr> <td>TIMESTAMP</td> - <td>INT96</td> + <td>INT96 (or INT64)</td> <td></td> </tr> <tr> diff --git a/flink-formats/flink-parquet/src/main/java/org/apache/flink/formats/parquet/vector/reader/TimestampColumnReader.java b/flink-formats/flink-parquet/src/main/java/org/apache/flink/formats/parquet/vector/reader/TimestampColumnReader.java index aa544f4e91c..7a36edc573b 100644 --- a/flink-formats/flink-parquet/src/main/java/org/apache/flink/formats/parquet/vector/reader/TimestampColumnReader.java +++ b/flink-formats/flink-parquet/src/main/java/org/apache/flink/formats/parquet/vector/reader/TimestampColumnReader.java @@ -159,8 +159,8 @@ public class TimestampColumnReader extends AbstractColumnReader<WritableTimestam public static TimestampData int64ToTimestamp( boolean utcTimestamp, long value, LogicalTypeAnnotation.TimeUnit timeUnit) { - long nanosOfMillisecond = 0L; - long milliseconds = 0L; + final long nanosOfMillisecond; + final long milliseconds; switch (timeUnit) { case MILLIS: @@ -176,7 +176,7 @@ public class TimestampColumnReader extends AbstractColumnReader<WritableTimestam nanosOfMillisecond = value % NANOS_PER_SECOND; break; default: - break; + throw new IllegalArgumentException("Invalid time unit: " + timeUnit); } if (utcTimestamp) {