[ https://issues.apache.org/jira/browse/HIVE-10867?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alina Abramova updated HIVE-10867: ---------------------------------- Affects Version/s: 1.0.0 Status: Patch Available (was: In Progress) > ArrayIndexOutOfBoundsException LazyBinaryUtils.byteArrayToLong with Hive on > Tez > ------------------------------------------------------------------------------- > > Key: HIVE-10867 > URL: https://issues.apache.org/jira/browse/HIVE-10867 > Project: Hive > Issue Type: Bug > Components: Hive, Tez > Affects Versions: 1.0.0, 0.14.0 > Environment: Hortwonworks distribution 2.2.4-2 > Hive 0.14.0 > Tez 0.5.2.2.2.4.2-2 on cluster > Tez 0.7.0 in local setup > Reporter: Per Ullberg > Assignee: Alina Abramova > Attachments: HIVE-10867.patch > > > Hi, > The following query runs fine on map reduce engine but when setting the > hive.exection.engine to tez it produces an ArrayIndexOutOfBoundsException. > Query > {code} > create external table table_1 (id string, date string, amount bigint); > insert into table table_1 values (305,'2013-03-02',3790); > create external table table_2 (id string); > insert into table table_2 VALUES (305); > create external table table_3 (id string, date_3 string, amount_3 bigint); > insert into table table_3 values (305,'2013-03-01',-1600); > create external table table_4 (id bigint, str_4 string, amount_4 bigint); > create table table_5 > as > SELECT > c.diff > FROM ( > SELECT > id AS id, > date AS create_date, > -amount AS diff > FROM table_1 > UNION ALL > SELECT > p.id AS id, > p.str_4 AS create_date, > -p.amount_4 AS diff > FROM table_4 p > UNION ALL > SELECT > id, > create_date, > diff > FROM ( > SELECT > i.id AS id, > tp.date_3 AS create_date, > cast(amount_3 as double) AS diff > FROM table_3 tp > INNER JOIN table_2 i ON cast(tp.id as string) = cast(i.id as string) > ) fees > ) c > INNER JOIN table_2 i ON cast(c.id as string) = cast(i.id as string); > {code} > Results with map reduce engine: > {code} > hive> select * from table_5; > OK > -1600.0 > -3790.0 > Time taken: 0.061 seconds, Fetched: 2 row(s) > {code} > Exception with tez engine: > {code} > Status: Failed > Vertex failed, vertexName=Reducer 4, vertexId=vertex_1432809678493_0891_4_06, > diagnostics=[Task failed, taskId=task_1432809678493_0891_4_06_000000, > diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running > task:java.lang.RuntimeException: > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while > processing row (tag=0) > {"key":{"reducesinkkey0":"305"},"value":{"_col1":-1600.0}} > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:186) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:138) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:324) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:176) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:168) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:168) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:163) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime > Error while processing row (tag=0) > {"key":{"reducesinkkey0":"305"},"value":{"_col1":-1600.0}} > at > org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource$GroupIterator.next(ReduceRecordSource.java:337) > at > org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource.pushRecord(ReduceRecordSource.java:218) > at > org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.run(ReduceRecordProcessor.java:168) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:163) > ... 13 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 6 > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.byteArrayToLong(LazyBinaryUtils.java:84) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryDouble.init(LazyBinaryDouble.java:43) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.uncheckedGetField(LazyBinaryStruct.java:264) > at > org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:201) > at > org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64) > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:98) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:77) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:65) > at > org.apache.hadoop.hive.ql.exec.JoinUtil.computeValues(JoinUtil.java:193) > at > org.apache.hadoop.hive.ql.exec.CommonJoinOperator.getFilteredValue(CommonJoinOperator.java:408) > at > org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator.processOp(CommonMergeJoinOperator.java:162) > at > org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource$GroupIterator.next(ReduceRecordSource.java:328) > ... 16 more > {code} > Secondly, adding a column to table_5 gets rid of the Exception, but instead > the result set is corrupted when using the tez engine. This is even more > scary! > Query > {code} > create table table_5 > as > SELECT > c.create_date, > c.diff > FROM ( > SELECT > id AS id, > date AS create_date, > -amount AS diff > FROM table_1 > UNION ALL > SELECT > p.id AS id, > p.str_4 AS create_date, > -p.amount_4 AS diff > FROM table_4 p > UNION ALL > SELECT > id, > create_date, > diff > FROM ( > SELECT > i.id AS id, > tp.date_3 AS create_date, > cast(amount_3 as double) AS diff > FROM table_3 tp > INNER JOIN table_2 i ON cast(tp.id as string) = cast(i.id as string) > ) fees > ) c > INNER JOIN table_2 i ON cast(c.id as string) = cast(i.id as string); > {code} > Result: > {code} > hive> select * from with_mr.table_5; > OK > 2013-03-02 -3790.0 > 2013-03-01 -1600.0 > Time taken: 8.107 seconds, Fetched: 2 row(s) > hive> select * from with_tez.table_5; > OK > 2013-03-01 -1600.0 > 2013-03-02 -1.6968199793927886E-279 > Time taken: 0.047 seconds, Fetched: 2 row(s) > {code} > This ticket could possibly be related to > https://issues.apache.org/jira/browse/HIVE-9517? -- This message was sent by Atlassian JIRA (v6.3.4#6332)