[ 
https://issues.apache.org/jira/browse/HIVE-25671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nemon Lou updated HIVE-25671:
-----------------------------
    Attachment: rcfile_kryo.patch

> Hybrid Grace Hash Join NullPointer When query RCFile
> ----------------------------------------------------
>
>                 Key: HIVE-25671
>                 URL: https://issues.apache.org/jira/browse/HIVE-25671
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 3.1.2
>            Reporter: Nemon Lou
>            Priority: Major
>         Attachments: rcfile_kryo.patch
>
>
> Hive 3.1.0 kryo 3.0.3 tez engine
> the following sql can reproduce this issue
> {code:sql}
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp3_3`( 
>    `logo` string,                                   
>    `customer_code` string,                          
>    `brand_name` string,                             
>    `business_code` string,                          
>    `discount` double,                               
>    `creation_date` string,                          
>    `etl_time` string)stored as rcfile; 
>      
> CREATE TABLE `nemon.rt_dm_lpc_customer_sum_tmp4_1`( 
>    `customer_code` string,                          
>    `etl_time` string) stored as rcfile; 
>    
> insert into nemon.rt_dm_lpc_customer_sum_tmp3_3 values 
> ("logo","customer_code","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code1","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code2","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code3","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code4","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code5","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code6","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code7","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code8","brand_name","business_code",1,"creation_date","etl_time")
>    
> ,("logo","customer_code9","brand_name","business_code",1,"creation_date","etl_time");
> insert into  nemon.rt_dm_lpc_customer_sum_tmp4_1  values 
> ("customer_code","etl_time")
>    ,("customer_code1","etl_time")
>    ,("customer_code2","etl_time")
>    ,("customer_code3","etl_time")
>    ;
> set hive.auto.convert.join.noconditionaltask.size=100000;
> set hive.mapjoin.hybridgrace.hashtable=true;
> SELECT
> tt1.logo,
> tt1.customer_code,
> tt1.brand_name,
> tt1.business_code,
> tt1.discount,
> tt1.creation_date,
> date_format(from_utc_timestamp(unix_timestamp()*1000,'Asia/Shanghai'),'yyyy-MM-dd
>  HH:mm:ss') etl_time
> from
> (
> SELECT
> t1.logo,
> t1.customer_code,
> t1.brand_name,
> t1.business_code,
> t1.discount,
> t1.creation_date,
> row_number() over(partition by t1.customer_code,t1.logo order by 
> t1.creation_date desc) as discount_rank
> from nemon.rt_dm_lpc_customer_sum_tmp3_3 t1
> join nemon.rt_dm_lpc_customer_sum_tmp4_1 t2
> on t2.customer_code = t1.customer_code
> ) tt1
> where tt1.discount_rank = 1;
> {code}
> Error log from tez task:
> {noformat}
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Deserializing spilled hash partition...
> 2021-11-04 10:02:47,553 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Number of rows in hashmap: 1
> 2021-11-04 10:02:47,554 [INFO] [TezChild] |exec.MapJoinOperator|: Hybrid 
> Grace Hash Join: Going to process spilled big table rows in partition 5. 
> Number of rows: 1
> 2021-11-04 10:02:47,561 [ERROR] [TezChild] |exec.MapJoinOperator|: Unexpected 
> exception from MapJoinOperator : null
> java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase$FieldInfo.uncheckedGetField(ColumnarStructBase.java:114)
>       at 
> org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase.getField(ColumnarStructBase.java:172)
>       at 
> org.apache.hadoop.hive.serde2.objectinspector.ColumnarStructObjectInspector.getStructFieldData(ColumnarStructObjectInspector.java:67)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:95)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80)
>       at 
> org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68)
>       at 
> org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer$GetAdaptor.setFromRow(MapJoinBytesTableContainer.java:552)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.setMapJoinKey(MapJoinOperator.java:415)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:466)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.reProcessBigTable(MapJoinOperator.java:755)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:671)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:604)
>       at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:733)
>       at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:757)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:477)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
>       at 
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
>       at 
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
>       at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
>       at 
> com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
>       at 
> com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
>       at 
> com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to