[ https://issues.apache.org/jira/browse/HIVE-25908?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
wu siqiang updated HIVE-25908: ------------------------------ Description: when i use window functions like last_value() with complex type, it throws an execption during doing reducer tasks. here are some test cases: {code:sql} -- failed, throws: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap cannot be cast to java.util.Map; with tmp as ( select "a" as a, map(1, "a") as b union all select "b" as a, map(2, "b") as b union all select "c" as a, map(3, "c") as b union all select "d" as a, map(4, "d") as b ) select a, last_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- failed, throws: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to org.apache.hadoop.io.Text; with tmp as ( select "a" as a, named_struct("c1", "a") as b union all select "b" as a, named_struct("c1", "b") as b union all select "c" as a, named_struct("c1", "c") as b union all select "d" as a, named_struct("c1", "d") as b ) select a, last_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- failed, throws: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray cannot be cast to [Ljava.lang.Object; with tmp as ( select "a" as a, array("c1", "a") as b union all select "b" as a, array("c1", "b") as b union all select "c" as a, array("c1", "c") as b union all select "d" as a, array("c1", "d") as b ) select a, last_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- successed! with tmp as ( select "a" as a, `map`("c1", "a") as b union all select "b" as a, `map`("c1", "b") as b union all select "c" as a, `map`("c1", "c") as b union all select "d" as a, `map`("c1", "d") as b ) select a, collect_list(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- failed, throws: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap cannot be cast to java.util.Map; with tmp as ( select "a" as a, `map`("c1", "a") as b union all select "b" as a, `map`("c1", "b") as b union all select "c" as a, `map`("c1", "c") as b union all select "d" as a, `map`("c1", "d") as b ) select a, first_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- sucessed! with tmp as ( select "a" as a, "a" as b union all select "b" as a, "b" as b union all select "c" as a, "c" as b union all select "d" as a, "d" as b ) select a, first_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp;{code} and following is one of exeption stack infos, their infos are similar: {code:java} 2022-01-28 12:49:45,387 ERROR [main] ExecReducer: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":1,"reducesinkkey1":"a"},"value":{"_col0":{"c1":"a"}}} at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:229) at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168) Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to org.apache.hadoop.io.Text at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.copyObject(WritableStringObjectInspector.java:36) at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:418) at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:453) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$GenericUDAFLastValueEvaluator.iterate(GenericUDAFLastValue.java:117) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$LastValStreamingFixedWindow.iterate(GenericUDAFLastValue.java:209) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:196) at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:412) at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325) at org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:138) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:220) ... 7 more 2022-01-28 12:49:45,388 WARN [main] org.apache.hadoop.hive.ql.exec.SelectOperator: Caught exception while closing operator: Internal Error: cannot generate all output rows for a Partition org.apache.hadoop.hive.ql.metadata.HiveException: Internal Error: cannot generate all output rows for a Partition at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.finishPartition(WindowingTableFunction.java:519) at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.finishPartition(PTFOperator.java:345) at org.apache.hadoop.hive.ql.exec.PTFOperator.closeOp(PTFOperator.java:103) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:686) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:700) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.close(ExecReducer.java:263) at org.apache.hadoop.io.IOUtils.cleanupWithLogger(IOUtils.java:278) at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:460) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168) 2022-01-28 12:49:45,391 WARN [main] org.apache.hadoop.mapred.YarnChild: Exception running child : java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":1,"reducesinkkey1":"a"},"value":{"_col0":{"c1":"a"}}} at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:241) at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"reducesinkkey0":1,"reducesinkkey1":"a"},"value":{"_col0":{"c1":"a"}}} at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:229) ... 7 more Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to org.apache.hadoop.io.Text at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.copyObject(WritableStringObjectInspector.java:36) at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:418) at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:453) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$GenericUDAFLastValueEvaluator.iterate(GenericUDAFLastValue.java:117) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$LastValStreamingFixedWindow.iterate(GenericUDAFLastValue.java:209) at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:196) at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:412) at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325) at org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:138) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:220) ... 7 more {code} was: when i use window functions like last_value() with complex type, it throws an execption during doing reducer tasks. here are some test cases: {code:sql} with tmp as ( select "a" as a, map(1, "a") as b union all select "b" as a, map(2, "b") as b union all select "c" as a, map(3, "c") as b union all select "d" as a, map(4, "d") as b ) select a, last_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- ERROR: with tmp as ( select "a" as a, named_struct("c1", "a") as b union all select "b" as a, named_struct("c1", "b") as b union all select "c" as a, named_struct("c1", "c") as b union all select "d" as a, named_struct("c1", "d") as b ) select a, last_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- ERROR: with tmp as ( select "a" as a, array("c1", "a") as b union all select "b" as a, array("c1", "b") as b union all select "c" as a, array("c1", "c") as b union all select "d" as a, array("c1", "d") as b ) select a, last_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- OK with tmp as ( select "a" as a, `map`("c1", "a") as b union all select "b" as a, `map`("c1", "b") as b union all select "c" as a, `map`("c1", "c") as b union all select "d" as a, `map`("c1", "d") as b ) select a, collect_list(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- ERROR with tmp as ( select "a" as a, `map`("c1", "a") as b union all select "b" as a, `map`("c1", "b") as b union all select "c" as a, `map`("c1", "c") as b union all select "d" as a, `map`("c1", "d") as b ) select a, first_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp; -- with tmp as ( select "a" as a, "a" as b union all select "b" as a, "b" as b union all select "c" as a, "c" as b union all select "d" as a, "d" as b ) select a, first_value(b) over (partition by 1 order by a rows unbounded preceding ) from tmp;{code} > Hive2.1.1 throws exception when use window function with complex type: > java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinary* > ---------------------------------------------------------------------------------------------------------------------------------------------------------- > > Key: HIVE-25908 > URL: https://issues.apache.org/jira/browse/HIVE-25908 > Project: Hive > Issue Type: Bug > Components: Query Processor > Affects Versions: 2.1.1 > Environment: Hive 2.1.1-cdh6.3.2 > Reporter: wu siqiang > Priority: Major > > when i use window functions like last_value() with complex type, it throws > an execption during doing reducer tasks. > here are some test cases: > {code:sql} > -- failed, throws: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap cannot be cast to > java.util.Map; > with tmp as ( > select "a" as a, map(1, "a") as b > union all > select "b" as a, map(2, "b") as b > union all > select "c" as a, map(3, "c") as b > union all > select "d" as a, map(4, "d") as b > ) > select a, last_value(b) over (partition by 1 order by a rows unbounded > preceding ) > from tmp; > -- failed, throws: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to > org.apache.hadoop.io.Text; > with tmp as ( > select "a" as a, named_struct("c1", "a") as b > union all > select "b" as a, named_struct("c1", "b") as b > union all > select "c" as a, named_struct("c1", "c") as b > union all > select "d" as a, named_struct("c1", "d") as b > ) > select a, last_value(b) over (partition by 1 order by a rows unbounded > preceding ) > from tmp; > -- failed, throws: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray > cannot be cast to [Ljava.lang.Object; > with tmp as ( > select "a" as a, array("c1", "a") as b > union all > select "b" as a, array("c1", "b") as b > union all > select "c" as a, array("c1", "c") as b > union all > select "d" as a, array("c1", "d") as b > ) > select a, last_value(b) over (partition by 1 order by a rows unbounded > preceding ) > from tmp; > -- successed! > with tmp as ( > select "a" as a, `map`("c1", "a") as b > union all > select "b" as a, `map`("c1", "b") as b > union all > select "c" as a, `map`("c1", "c") as b > union all > select "d" as a, `map`("c1", "d") as b > ) > select a, collect_list(b) over (partition by 1 order by a rows unbounded > preceding ) > from tmp; > -- failed, throws: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap cannot be cast to > java.util.Map; > with tmp as ( > select "a" as a, `map`("c1", "a") as b > union all > select "b" as a, `map`("c1", "b") as b > union all > select "c" as a, `map`("c1", "c") as b > union all > select "d" as a, `map`("c1", "d") as b > ) > select a, first_value(b) over (partition by 1 order by a rows unbounded > preceding ) > from tmp; > -- sucessed! > with tmp as ( > select "a" as a, "a" as b > union all > select "b" as a, "b" as b > union all > select "c" as a, "c" as b > union all > select "d" as a, "d" as b > ) > select a, first_value(b) over (partition by 1 order by a rows unbounded > preceding ) > from tmp;{code} > and following is one of exeption stack infos, their infos are similar: > {code:java} > 2022-01-28 12:49:45,387 ERROR [main] ExecReducer: > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while > processing row (tag=0) > {"key":{"reducesinkkey0":1,"reducesinkkey1":"a"},"value":{"_col0":{"c1":"a"}}} > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:229) > at > org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) > at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) > at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875) > at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168) > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to > org.apache.hadoop.io.Text > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.copyObject(WritableStringObjectInspector.java:36) > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:418) > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:453) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$GenericUDAFLastValueEvaluator.iterate(GenericUDAFLastValue.java:117) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$LastValStreamingFixedWindow.iterate(GenericUDAFLastValue.java:209) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:196) > at > org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:412) > at > org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325) > at > org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:138) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:220) > ... 7 more > 2022-01-28 12:49:45,388 WARN [main] > org.apache.hadoop.hive.ql.exec.SelectOperator: Caught exception while closing > operator: Internal Error: cannot generate all output rows for a Partition > org.apache.hadoop.hive.ql.metadata.HiveException: Internal Error: cannot > generate all output rows for a Partition > at > org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.finishPartition(WindowingTableFunction.java:519) > at > org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.finishPartition(PTFOperator.java:345) > at > org.apache.hadoop.hive.ql.exec.PTFOperator.closeOp(PTFOperator.java:103) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:686) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:700) > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.close(ExecReducer.java:263) > at org.apache.hadoop.io.IOUtils.cleanupWithLogger(IOUtils.java:278) > at > org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:460) > at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) > at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875) > at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168) > 2022-01-28 12:49:45,391 WARN [main] org.apache.hadoop.mapred.YarnChild: > Exception running child : java.lang.RuntimeException: > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while > processing row (tag=0) > {"key":{"reducesinkkey0":1,"reducesinkkey1":"a"},"value":{"_col0":{"c1":"a"}}} > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:241) > at > org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:445) > at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:393) > at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875) > at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168) > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime > Error while processing row (tag=0) > {"key":{"reducesinkkey0":1,"reducesinkkey1":"a"},"value":{"_col0":{"c1":"a"}}} > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:229) > ... 7 more > Caused by: java.lang.ClassCastException: > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct cannot be cast to > org.apache.hadoop.io.Text > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.copyObject(WritableStringObjectInspector.java:36) > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:418) > at > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.copyToStandardObject(ObjectInspectorUtils.java:453) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$GenericUDAFLastValueEvaluator.iterate(GenericUDAFLastValue.java:117) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue$LastValStreamingFixedWindow.iterate(GenericUDAFLastValue.java:209) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:196) > at > org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.processRow(WindowingTableFunction.java:412) > at > org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.processRow(PTFOperator.java:325) > at > org.apache.hadoop.hive.ql.exec.PTFOperator.process(PTFOperator.java:138) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:882) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) > at > org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:220) > ... 7 more {code} > -- This message was sent by Atlassian Jira (v8.20.1#820001)