This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f3e3c91b882 HIVE-26612: INT64 Parquet timestamps cannot be read into 
BIGINT Hive type (Steve Carlin reviewed by Stamatis Zampetakis)
f3e3c91b882 is described below

commit f3e3c91b882c442ffd872de998f8db40f7bff162
Author: Steve Carlin <scar...@cloudera.com>
AuthorDate: Fri Oct 7 16:06:33 2022 -0700

    HIVE-26612: INT64 Parquet timestamps cannot be read into BIGINT Hive type 
(Steve Carlin reviewed by Stamatis Zampetakis)
    
    Although HIVE-23345, claims to have fixed the exact same problem that's
    not true. HIVE-23345, was sufficient to fix the conversion of INT96
    Parquet timestamp to BIGINT but not for INT64.
    
    The code in EINT64_TIMESTAMP_CONVERTER (handles INT64 timestamp) for
    converting to BIGINT (convert(Binary)) is never called in the
    production code path.
    
    The EINT64_TIMESTAMP_CONVERTER always reads data from a primitive INT64
    type so we should always use PrimitiveConverter#addLong method.
    
    There were some tests in TestETypesConverterTest artificially hitting
    the convert(binary) code path but these are wrong since Parquet writers
    (inside or outside HIVE) never write INT64 timestamps as binaries;
    they read/write longs. The tests were renamed to better reflect their
    purpose and those targetting the INT64 timestamp type were modified to
    use long objects.
    
    Closes #3651
---
 data/files/hive_26612.parquet                      | Bin 0 -> 769 bytes
 .../hive/ql/io/parquet/convert/ETypeConverter.java |  23 ++++++++++--------
 .../ql/io/parquet/convert/TestETypeConverter.java  |  10 +++-----
 .../parquet_int64_timestamp_to_bigint.q            |  22 +++++++++++++++++
 .../llap/parquet_int64_timestamp_to_bigint.q.out   |  26 +++++++++++++++++++++
 5 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/data/files/hive_26612.parquet b/data/files/hive_26612.parquet
new file mode 100644
index 00000000000..9a42d906807
Binary files /dev/null and b/data/files/hive_26612.parquet differ
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 28207714e3c..4c3ab70958e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -749,18 +749,21 @@ public enum ETypeConverter {
         TypeInfo hiveTypeInfo) {
       if (hiveTypeInfo != null) {
         String typeName = 
TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName());
+        final long min = getMinValue(type, typeName, Long.MIN_VALUE);
+        final long max = getMaxValue(typeName, Long.MAX_VALUE);
+
         switch (typeName) {
-          case serdeConstants.BIGINT_TYPE_NAME:
-            return new BinaryConverter<LongWritable>(type, parent, index) {
-              @Override
-              protected LongWritable convert(Binary binary) {
-                Preconditions.checkArgument(binary.length() == 8, "Must be 8 
bytes");
-                ByteBuffer buf = binary.toByteBuffer();
-                buf.order(ByteOrder.LITTLE_ENDIAN);
-                long longVal = buf.getLong();
-                return new LongWritable(longVal);
+        case serdeConstants.BIGINT_TYPE_NAME:
+          return new PrimitiveConverter() {
+            @Override
+            public void addLong(long value) {
+              if ((value >= min) && (value <= max)) {
+                parent.set(index, new LongWritable(value));
+              } else {
+                parent.set(index, null);
               }
-            };
+            }
+          };
         }
       }
       return new PrimitiveConverter() {
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
index fcfb5c7782c..cf6444c9c04 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
@@ -116,23 +116,19 @@ public class TestETypeConverter {
   }
 
   @Test
-  public void testGetSmallBigIntConverter() {
+  public void testGetInt64TimestampConverterBigIntHiveType() {
     Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231");
     long msTime = timestamp.toEpochMilli();
-    ByteBuffer buf = ByteBuffer.allocate(12);
-    buf.order(ByteOrder.LITTLE_ENDIAN);
-    buf.putLong(msTime);
-    buf.flip();
     // Need TimeStamp logicalType annotation here
     PrimitiveType primitiveType = createInt64TimestampType(false, 
TimeUnit.MILLIS);
-    Writable writable = 
getWritableFromBinaryConverter(createHiveTypeInfo("bigint"), primitiveType, 
Binary.fromByteBuffer(buf));
+    Writable writable = 
getWritableFromPrimitiveConverter(createHiveTypeInfo("bigint"), primitiveType, 
msTime);
     // Retrieve as BigInt
     LongWritable longWritable = (LongWritable) writable;
     assertEquals(msTime, longWritable.get());
   }
 
   @Test
-  public void testGetBigIntConverter() {
+  public void testGetInt96TimestampConverterBigIntHiveType() {
     Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231");
     NanoTime nanoTime = NanoTimeUtils.getNanoTime(timestamp, ZoneOffset.UTC, 
false);
     PrimitiveType primitiveType = 
Types.optional(PrimitiveTypeName.INT96).named("value");
diff --git 
a/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_bigint.q 
b/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_bigint.q
new file mode 100644
index 00000000000..62b18dc3140
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_bigint.q
@@ -0,0 +1,22 @@
+-- The file hive_26612.parquet has the following schema:
+-- {
+--   "type" : "record",
+--   "name" : "spark_schema",
+--   "fields" : [ {
+--     "name" : "typeid",
+--     "type" : "int"
+--   }, {
+--     "name" : "eventtime",
+--     "type" : [ "null", {
+--       "type" : "long",
+--       "logicalType" : "timestamp-millis"
+--     } ],
+--     "default" : null
+--   } ]
+-- }
+-- It was created from Spark with the steps documented in HIVE-26612
+CREATE TABLE ts_as_bigint_pq (typeid int, eventtime BIGINT) STORED AS PARQUET;
+
+LOAD DATA LOCAL INPATH '../../data/files/hive_26612.parquet' into table 
ts_as_bigint_pq;
+
+SELECT * FROM ts_as_bigint_pq;
diff --git 
a/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_bigint.q.out
 
b/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_bigint.q.out
new file mode 100644
index 00000000000..22ffd1bee8f
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_bigint.q.out
@@ -0,0 +1,26 @@
+PREHOOK: query: CREATE TABLE ts_as_bigint_pq (typeid int, eventtime BIGINT) 
STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ts_as_bigint_pq
+POSTHOOK: query: CREATE TABLE ts_as_bigint_pq (typeid int, eventtime BIGINT) 
STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ts_as_bigint_pq
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/hive_26612.parquet' 
into table ts_as_bigint_pq
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@ts_as_bigint_pq
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/hive_26612.parquet' 
into table ts_as_bigint_pq
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@ts_as_bigint_pq
+PREHOOK: query: SELECT * FROM ts_as_bigint_pq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ts_as_bigint_pq
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM ts_as_bigint_pq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ts_as_bigint_pq
+#### A masked pattern was here ####
+1      1388617201000
+1      1417351232000

Reply via email to