This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 97d7630bca1 HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis) 97d7630bca1 is described below commit 97d7630bca10e96229519ab397f5cf122e5622e3 Author: Soumyakanti Das <soumyakanti....@cloudera.com> AuthorDate: Tue Jul 5 15:32:53 2022 -0700 HIVE-26373: ClassCastException when reading timestamps from HBase table with Avro data (Soumyakanti Das reviewed by Stamatis Zampetakis) Closes #3418 --- data/files/nested_ts.avsc | 27 ++++++++++++ .../queries/positive/hbase_avro_nested_timestamp.q | 22 ++++++++++ .../positive/hbase_avro_nested_timestamp.q.out | 45 +++++++++++++++++++ .../apache/hadoop/hive/hbase/HBaseTestSetup.java | 51 ++++++++++++++++++++++ .../hive/serde2/avro/AvroLazyObjectInspector.java | 3 +- 5 files changed, 147 insertions(+), 1 deletion(-) diff --git a/data/files/nested_ts.avsc b/data/files/nested_ts.avsc new file mode 100644 index 00000000000..eac0ad29475 --- /dev/null +++ b/data/files/nested_ts.avsc @@ -0,0 +1,27 @@ +{ + "type": "record", + "name": "TableRecord", + "namespace": "org.apache.hive", + "fields": [ + { + "name": "id", + "type": "string" + }, + { + "name": "dischargedate", + "type": { + "name": "DateRecord", + "type": "record", + "fields": [ + { + "name": "value", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + } + ] + } + } + ] +} diff --git a/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q b/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q new file mode 100644 index 00000000000..5f3a22cc51a --- /dev/null +++ b/hbase-handler/src/test/queries/positive/hbase_avro_nested_timestamp.q @@ -0,0 +1,22 @@ +dfs -cp ${system:hive.root}data/files/nested_ts.avsc ${system:test.tmp.dir}/nested_ts.avsc; + +CREATE EXTERNAL TABLE hbase_avro_table( +`key` string COMMENT '', +`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( +'serialization.format'='1', +'hbase.columns.mapping' = ':key,data:frV4', +'data.frV4.serialization.type'='avro', +'data.frV4.avro.schema.url'='${system:test.tmp.dir}/nested_ts.avsc' +) +TBLPROPERTIES ( +'hbase.table.name' = 'HiveAvroTable', +'hbase.struct.autogenerate'='true'); + +set hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; + +select data_frV4.dischargedate.value from hbase_avro_table; diff --git a/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out b/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out new file mode 100644 index 00000000000..6f08b83e3cf --- /dev/null +++ b/hbase-handler/src/test/results/positive/hbase_avro_nested_timestamp.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: CREATE EXTERNAL TABLE hbase_avro_table( +`key` string COMMENT '', +`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( +'serialization.format'='1', +'hbase.columns.mapping' = ':key,data:frV4', +'data.frV4.serialization.type'='avro', +#### A masked pattern was here #### +) +TBLPROPERTIES ( +'hbase.table.name' = 'HiveAvroTable', +'hbase.struct.autogenerate'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hbase_avro_table +POSTHOOK: query: CREATE EXTERNAL TABLE hbase_avro_table( +`key` string COMMENT '', +`data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( +'serialization.format'='1', +'hbase.columns.mapping' = ':key,data:frV4', +'data.frV4.serialization.type'='avro', +#### A masked pattern was here #### +) +TBLPROPERTIES ( +'hbase.table.name' = 'HiveAvroTable', +'hbase.struct.autogenerate'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hbase_avro_table +PREHOOK: query: select data_frV4.dischargedate.value from hbase_avro_table +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_avro_table +#### A masked pattern was here #### +POSTHOOK: query: select data_frV4.dischargedate.value from hbase_avro_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_avro_table +#### A masked pattern was here #### +2022-07-05 00:00:00 diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java index 8baf1464b9a..202420854df 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/hbase/HBaseTestSetup.java @@ -18,9 +18,21 @@ package org.apache.hadoop.hive.hbase; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneId; import java.util.Arrays; +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -29,6 +41,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; @@ -107,6 +120,7 @@ public class HBaseTestSetup extends QTestSetup { if (meta != null) meta.close(); } createHBaseTable(); + createAvroTable(); } private void createHBaseTable() throws IOException { @@ -158,6 +172,43 @@ public class HBaseTestSetup extends QTestSetup { } } + private static byte[] createAvroRecordWithNestedTimestamp() throws IOException { + File schemaFile = Paths.get(System.getProperty("test.data.dir"), "nested_ts.avsc").toFile(); + Schema schema = new Schema.Parser().parse(schemaFile); + GenericData.Record rootRecord = new GenericData.Record(schema); + rootRecord.put("id", "X338092"); + GenericData.Record dateRecord = new GenericData.Record(schema.getField("dischargedate").schema()); + final LocalDateTime _2022_07_05 = LocalDate.of(2022, 7, 5).atStartOfDay(); + // Store in UTC as required per Avro specification and as done by Hive in other parts of the system + dateRecord.put("value", _2022_07_05.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli()); + rootRecord.put("dischargedate", dateRecord); + + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + try (DataFileWriter<GenericRecord> dataFileWriter + = new DataFileWriter<GenericRecord>(new GenericDatumWriter<>(schema))) { + dataFileWriter.create(schema, out); + dataFileWriter.append(rootRecord); + } + return out.toByteArray(); + } + } + + private void createAvroTable() throws IOException { + final TableName hbaseTable = TableName.valueOf("HiveAvroTable"); + HTableDescriptor htableDesc = new HTableDescriptor(hbaseTable); + htableDesc.addFamily(new HColumnDescriptor("data".getBytes())); + + try (Admin hbaseAdmin = hbaseConn.getAdmin()) { + hbaseAdmin.createTable(htableDesc); + try (Table table = hbaseConn.getTable(hbaseTable)) { + Put p = new Put("1".getBytes()); + p.add(new KeyValue("1".getBytes(), "data".getBytes(), "frV4".getBytes(), + createAvroRecordWithNestedTimestamp())); + table.put(p); + } + } + } + @Override public void tearDown() throws Exception { if (hbaseCluster != null) { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java index 5a857f2be65..d0956bde549 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.serde2.avro; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -498,6 +499,6 @@ public class AvroLazyObjectInspector extends LazySimpleStructObjectInspector { * */ private boolean isPrimitive(Class<?> clazz) { return clazz.isPrimitive() || ClassUtils.wrapperToPrimitive(clazz) != null - || clazz.getSimpleName().equals("String"); + || Arrays.asList("String", "Timestamp").contains(clazz.getSimpleName()); } }