This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 5519bb37fb5 HIVE-26658: INT64 Parquet timestamps cannot be mapped to 
most Hive numeric types (Stamatis Zampetakis reviewed by Chris Nauroth, Steve 
Carlin, Ayush Saxena)
5519bb37fb5 is described below

commit 5519bb37fb5189004804435f09ca9227bd6d9d6b
Author: Stamatis Zampetakis <zabe...@gmail.com>
AuthorDate: Mon Oct 24 11:50:42 2022 +0200

    HIVE-26658: INT64 Parquet timestamps cannot be mapped to most Hive numeric 
types (Stamatis Zampetakis reviewed by Chris Nauroth, Steve Carlin, Ayush 
Saxena)
    
    Closes #3698
---
 .../hive/ql/io/parquet/convert/ETypeConverter.java |  53 +++----
 .../ql/io/parquet/convert/TestETypeConverter.java  |  85 ++++++++++-
 .../parquet_int64_timestamp_to_numeric.q           |  37 +++++
 .../llap/parquet_int64_timestamp_to_numeric.q.out  | 162 +++++++++++++++++++++
 4 files changed, 295 insertions(+), 42 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 4c3ab70958e..91f19418356 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.BooleanWritable;
@@ -448,6 +449,21 @@ public enum ETypeConverter {
               }
             }
           };
+        case serdeConstants.TIMESTAMP_TYPE_NAME:
+        case serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME:
+          if (type.getLogicalTypeAnnotation() instanceof 
TimestampLogicalTypeAnnotation) {
+            TimestampLogicalTypeAnnotation logicalType =
+                (TimestampLogicalTypeAnnotation) 
type.getLogicalTypeAnnotation();
+            return new PrimitiveConverter() {
+              @Override
+              public void addLong(final long value) {
+                Timestamp timestamp =
+                    ParquetTimestampUtils.getTimestamp(value, 
logicalType.getUnit(), logicalType.isAdjustedToUTC());
+                parent.set(index, new TimestampWritableV2(timestamp));
+              }
+            };
+          }
+          throw new IllegalStateException("Cannot reliably convert INT64 value 
to timestamp without type annotation");
         default:
           return new PrimitiveConverter() {
             @Override
@@ -743,40 +759,6 @@ public enum ETypeConverter {
       };
     }
   },
-  EINT64_TIMESTAMP_CONVERTER(TimestampWritableV2.class) {
-    @Override
-    PrimitiveConverter getConverter(final PrimitiveType type, final int index, 
final ConverterParent parent,
-        TypeInfo hiveTypeInfo) {
-      if (hiveTypeInfo != null) {
-        String typeName = 
TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName());
-        final long min = getMinValue(type, typeName, Long.MIN_VALUE);
-        final long max = getMaxValue(typeName, Long.MAX_VALUE);
-
-        switch (typeName) {
-        case serdeConstants.BIGINT_TYPE_NAME:
-          return new PrimitiveConverter() {
-            @Override
-            public void addLong(long value) {
-              if ((value >= min) && (value <= max)) {
-                parent.set(index, new LongWritable(value));
-              } else {
-                parent.set(index, null);
-              }
-            }
-          };
-        }
-      }
-      return new PrimitiveConverter() {
-        @Override
-        public void addLong(final long value) {
-          TimestampLogicalTypeAnnotation logicalType = 
(TimestampLogicalTypeAnnotation) type.getLogicalTypeAnnotation();
-          Timestamp timestamp =
-              ParquetTimestampUtils.getTimestamp(value, logicalType.getUnit(), 
logicalType.isAdjustedToUTC());
-          parent.set(index, new TimestampWritableV2(timestamp));
-        }
-      };
-    }
-  },
   EDATE_CONVERTER(DateWritableV2.class) {
     @Override
     PrimitiveConverter getConverter(final PrimitiveType type, final int index, 
final ConverterParent parent, TypeInfo hiveTypeInfo) {
@@ -833,7 +815,8 @@ public enum ETypeConverter {
 
             @Override
             public Optional<PrimitiveConverter> 
visit(TimestampLogicalTypeAnnotation logicalTypeAnnotation) {
-              return Optional.of(EINT64_TIMESTAMP_CONVERTER.getConverter(type, 
index, parent, hiveTypeInfo));
+              TypeInfo info = hiveTypeInfo == null ? 
TypeInfoFactory.timestampTypeInfo : hiveTypeInfo;
+              return Optional.of(EINT64_CONVERTER.getConverter(type, index, 
parent, info));
             }
           });
 
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
index cf6444c9c04..3173d2db900 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
+import static 
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getPrimitiveTypeInfo;
 import static 
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -27,6 +28,7 @@ import java.nio.ByteOrder;
 import java.time.ZoneId;
 import java.time.ZoneOffset;
 
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.Timestamp;
 import 
org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter.BinaryConverter;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
@@ -39,6 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
@@ -115,16 +118,84 @@ public class TestETypeConverter {
     assertEquals(22, (int) doubleWritable.get());
   }
 
+  @Test
+  public void testGetInt64TimestampConverterTinyIntHiveType() {
+    testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005", 
"tinyint", 5);
+  }
+
+  @Test
+  public void testGetInt64TimestampConverterSmallIntHiveType() {
+    testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005", 
"smallint", 5);
+  }
+
+  @Test
+  public void testGetInt64TimestampConverterIntHiveType() {
+    testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005", 
"int", 5);
+  }
+
   @Test
   public void testGetInt64TimestampConverterBigIntHiveType() {
-    Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231");
-    long msTime = timestamp.toEpochMilli();
-    // Need TimeStamp logicalType annotation here
+    testGetInt64TimestampConverterNumericHiveType("1998-10-03 09:58:31.231", 
"bigint", 907408711231L);
+  }
+
+  @Test
+  public void testGetInt64TimestampConverterFloatHiveType() {
+    testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005", 
"float", 5.0f);
+  }
+
+  @Test
+  public void testGetInt64TimestampConverterDoubleHiveType() {
+    testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005", 
"double", 5.0d);
+  }
+
+  @Test
+  public void testGetInt64TimestampConverterDecimalHiveType() {
+    testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005", 
"decimal(1,0)", HiveDecimal.create(5));
+  }
+
+  @Test
+  public void testGetInt64TimestampConverterNoHiveType() {
+    Timestamp ts = Timestamp.valueOf("2022-10-24 11:35:00.005");
     PrimitiveType primitiveType = createInt64TimestampType(false, 
TimeUnit.MILLIS);
-    Writable writable = 
getWritableFromPrimitiveConverter(createHiveTypeInfo("bigint"), primitiveType, 
msTime);
-    // Retrieve as BigInt
-    LongWritable longWritable = (LongWritable) writable;
-    assertEquals(msTime, longWritable.get());
+    Writable writable = getWritableFromPrimitiveConverter(null, primitiveType, 
ts.toEpochMilli());
+    assertEquals("2022-10-24 11:35:00.005", ((TimestampWritableV2) 
writable).getTimestamp().toString());
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testGetInt64NoLogicalAnnotationTimestampHiveType() {
+    Timestamp ts = Timestamp.valueOf("2022-10-24 11:43:00.005");
+    PrimitiveType primitiveType = 
Types.optional(PrimitiveTypeName.INT64).named("int64");
+    getWritableFromPrimitiveConverter(TypeInfoFactory.timestampTypeInfo, 
primitiveType, ts.toEpochMilli());
+  }
+  
+  private void testGetInt64TimestampConverterNumericHiveType(String timestamp, 
String type, Object expected) {
+    Timestamp ts = Timestamp.valueOf(timestamp);
+    PrimitiveType primitiveType = createInt64TimestampType(false, 
TimeUnit.MILLIS);
+    PrimitiveTypeInfo info = getPrimitiveTypeInfo(type);
+    Writable writable = getWritableFromPrimitiveConverter(info, primitiveType, 
ts.toEpochMilli());
+    final Object actual;
+    switch (info.getPrimitiveCategory()) {
+    case BYTE:
+    case SHORT:
+    case INT:
+      actual = ((IntWritable) writable).get();
+      break;
+    case LONG:
+      actual = ((LongWritable) writable).get();
+      break;
+    case FLOAT:
+      actual = ((FloatWritable) writable).get();
+      break;
+    case DOUBLE:
+      actual = ((DoubleWritable) writable).get();
+      break;
+    case DECIMAL:
+      actual = ((HiveDecimalWritable) writable).getHiveDecimal();
+      break;
+    default:
+      throw new IllegalStateException(info.toString());
+    }
+    assertEquals(expected, actual);
   }
 
   @Test
diff --git 
a/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_numeric.q 
b/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_numeric.q
new file mode 100644
index 00000000000..24b3336028d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_numeric.q
@@ -0,0 +1,37 @@
+set hive.parquet.write.int64.timestamp=true;
+set hive.parquet.timestamp.time.unit=micros;
+CREATE TABLE hive_26658_table (ts TIMESTAMP) STORED AS PARQUET;
+
+INSERT INTO hive_26658_table VALUES ('2022-10-21 15:58:32');
+INSERT INTO hive_26658_table VALUES ('1970-01-01 00:00:00.000009');
+
+SELECT * FROM hive_26658_table;
+
+set metastore.disallow.incompatible.col.type.changes=false;
+ALTER TABLE hive_26658_table CHANGE ts ts TINYINT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts SMALLINT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts INT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts BIGINT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts DOUBLE;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts FLOAT;
+
+SELECT * FROM hive_26658_table;
+    
+ALTER TABLE hive_26658_table CHANGE ts ts Decimal;
+
+SELECT * FROM hive_26658_table;
diff --git 
a/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_numeric.q.out
 
b/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_numeric.q.out
new file mode 100644
index 00000000000..dc2be4032a4
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_numeric.q.out
@@ -0,0 +1,162 @@
+PREHOOK: query: CREATE TABLE hive_26658_table (ts TIMESTAMP) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: CREATE TABLE hive_26658_table (ts TIMESTAMP) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: INSERT INTO hive_26658_table VALUES ('2022-10-21 15:58:32')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: INSERT INTO hive_26658_table VALUES ('2022-10-21 15:58:32')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@hive_26658_table
+POSTHOOK: Lineage: hive_26658_table.ts SCRIPT []
+PREHOOK: query: INSERT INTO hive_26658_table VALUES ('1970-01-01 
00:00:00.000009')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: INSERT INTO hive_26658_table VALUES ('1970-01-01 
00:00:00.000009')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@hive_26658_table
+POSTHOOK: Lineage: hive_26658_table.ts SCRIPT []
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+2022-10-21 15:58:32
+1970-01-01 00:00:00.000009
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts TINYINT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts TINYINT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts SMALLINT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts SMALLINT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts INT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts INT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts BIGINT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts BIGINT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+1666367912000000
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts DOUBLE
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts DOUBLE
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+1.666367912E15
+9.0
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts FLOAT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts FLOAT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+1.66636785E15
+9.0
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts Decimal
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts Decimal
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9

Reply via email to