[ https://issues.apache.org/jira/browse/HIVE-26373?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Stamatis Zampetakis updated HIVE-26373: --------------------------------------- Description: Consider an HBase table (e.g., HiveAvroTable) that has column with Avro data and there are timestamps nested under complex/struct types. {code:sql} CREATE EXTERNAL TABLE hbase_avro_table( `key` string COMMENT '', `data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseSerDe' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( 'serialization.format'='1', 'hbase.columns.mapping' = ':key,data:frV4', 'data.frV4.serialization.type'='avro', 'data.frV4.avro.schema.url'='path/to/avro/schema/for/column/filename.avsc' ) TBLPROPERTIES ( 'hbase.table.name' = 'HiveAvroTable', 'hbase.struct.autogenerate'='true'); {code} Any attempt to read the timestamp value from the nested struct leads to a {{{}ClassCastException{}}}. {code:sql} select data_frV4.dischargedate.value from hbase_avro_table; {code} Below you can find the stack trace for the previous query: {noformat} 2022-07-05T08:40:51,572 ERROR [LocalJobRunner Map Task Executor #0] mr.ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:573) at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:148) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.common.type.Timestamp cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyPrimitive at org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector.getPrimitiveWritableObject(AbstractPrimitiveLazyObjectInspector.java:40) at org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector.getPrimitiveWritableObject(LazyTimestampObjectInspector.java:29) at org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:308) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:292) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231) at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:1059) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:937) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:937) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:128) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:152) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:552) ... 11 more {noformat} The problem starts in {{toLazyObject}} method of {*}AvroLazyObjectInspector.java{*}, when [this|https://github.com/apache/hive/blob/53009126f6fe7ccf24cf052fd6c156542f38b19d/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java#L347] condition returns false for {*}Timestamp{*}, preventing the conversion of *Timestamp* to *LazyTimestamp* [here|https://github.com/apache/hive/blob/53009126f6fe7ccf24cf052fd6c156542f38b19d/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java#L132]. The solution is to return {{true}} for Timestamps in the {{isPrimitive}} method. was: For Avro data where the schema has nested struct with a Timestamp datatype, we get the following ClassCastException: {code:java} 2022-07-05T08:40:51,572 ERROR [LocalJobRunner Map Task Executor #0] mr.ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:573) at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:148) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.common.type.Timestamp cannot be cast to org.apache.hadoop.hive.serde2.lazy.LazyPrimitive at org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector.getPrimitiveWritableObject(AbstractPrimitiveLazyObjectInspector.java:40) at org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector.getPrimitiveWritableObject(LazyTimestampObjectInspector.java:29) at org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:308) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:292) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231) at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:1059) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:937) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:937) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:128) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:152) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:552) ... 11 more {code} The problem starts in {{toLazyObject}} method of {*}AvroLazyObjectInspector.java{*}, when [this|https://github.com/apache/hive/blob/53009126f6fe7ccf24cf052fd6c156542f38b19d/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java#L347] condition returns false for {*}Timestamp{*}, preventing the conversion of *Timestamp* to *LazyTimestamp* [here|https://github.com/apache/hive/blob/53009126f6fe7ccf24cf052fd6c156542f38b19d/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java#L132]. The solution is to return {{true}} for Timestamps in the {{isPrimitive}} method. > ClassCastException when reading timestamps from HBase table with Avro data > -------------------------------------------------------------------------- > > Key: HIVE-26373 > URL: https://issues.apache.org/jira/browse/HIVE-26373 > Project: Hive > Issue Type: Bug > Components: Hive > Reporter: Soumyakanti Das > Assignee: Soumyakanti Das > Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > > Consider an HBase table (e.g., HiveAvroTable) that has column with Avro data > and there are timestamps nested under complex/struct types. > {code:sql} > CREATE EXTERNAL TABLE hbase_avro_table( > `key` string COMMENT '', > `data_frv4` struct<`id`:string, `dischargedate`:struct<`value`:timestamp>>) > ROW FORMAT SERDE > 'org.apache.hadoop.hive.hbase.HBaseSerDe' > STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ( > 'serialization.format'='1', > 'hbase.columns.mapping' = ':key,data:frV4', > 'data.frV4.serialization.type'='avro', > 'data.frV4.avro.schema.url'='path/to/avro/schema/for/column/filename.avsc' > ) > TBLPROPERTIES ( > 'hbase.table.name' = 'HiveAvroTable', > 'hbase.struct.autogenerate'='true'); > {code} > Any attempt to read the timestamp value from the nested struct leads to a > {{{}ClassCastException{}}}. > {code:sql} > select data_frV4.dischargedate.value from hbase_avro_table; > {code} > Below you can find the stack trace for the previous query: > {noformat} > 2022-07-05T08:40:51,572 ERROR [LocalJobRunner Map Task Executor #0] > mr.ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime > Error while processing row > at > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:573) > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:148) > at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) > at > org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) > at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) > at > org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.common.type.Timestamp cannot be cast to > org.apache.hadoop.hive.serde2.lazy.LazyPrimitive > at > org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector.getPrimitiveWritableObject(AbstractPrimitiveLazyObjectInspector.java:40) > at > org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector.getPrimitiveWritableObject(LazyTimestampObjectInspector.java:29) > at > org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:308) > at > org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:292) > at > org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247) > at > org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231) > at > org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55) > at > org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:1059) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:937) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:937) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:128) > at > org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:152) > at > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:552) > ... 11 more > {noformat} > The problem starts in {{toLazyObject}} method of > {*}AvroLazyObjectInspector.java{*}, when > [this|https://github.com/apache/hive/blob/53009126f6fe7ccf24cf052fd6c156542f38b19d/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java#L347] > condition returns false for {*}Timestamp{*}, preventing the conversion of > *Timestamp* to *LazyTimestamp* > [here|https://github.com/apache/hive/blob/53009126f6fe7ccf24cf052fd6c156542f38b19d/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java#L132]. > The solution is to return {{true}} for Timestamps in the {{isPrimitive}} > method. -- This message was sent by Atlassian Jira (v8.20.10#820010)