[ 
https://issues.apache.org/jira/browse/HIVE-24124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

GuangMing Lu updated HIVE-24124:
--------------------------------
    Description: 
{code:java}
create table z_tab_1(
    task_id  string,    
    data_date  string,  
    accno  string,      
    curr_type  string,  
    ifrs9_pd12_value  double,
    ifrs9_ccf_value  double,
    ifrs9_lgd_value  double
)partitioned by(pt_dt string)
STORED AS ORCFILE
TBLPROPERTIES ('bucketing_version'='1');

alter table z_tab_1 add partition(pt_dt = '2020-7-31');

insert into z_tab_1 partition(pt_dt = '2020-7-31') values
('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3),
('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
drop table if exists z_tab_2;
CREATE TABLE z_tab_2(              
    task_id  string,                
    data_date  string,              
    accno  string,                  
    curr_type  string,              
    ifrs9_pd12_value  double,       
    ifrs9_ccf_value  double,        
    ifrs9_lgd_value  double
) 
CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, 
CURR_TYPE) INTO 2000 BUCKETS 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORCFILE;

set hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE z_tab_2
SELECT  DCCR.TASK_ID
       ,DCCR.DATA_DATE
       ,DCCR.ACCNO
       ,DCCR.CURR_TYPE
       ,DCCR.IFRS9_PD12_VALUE
       ,DCCR.IFRS9_CCF_VALUE
       ,DCCR.IFRS9_LGD_VALUE 
FROM z_tab_1 DCCR
WHERE pt_dt = '2020-7-31';
{code}
{noformat}
Caused by: java.lang.NullPointerException  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
  
at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237)  
... 7 more{noformat}

  was:
{code:java}
create table z_tab_1(
    task_id  string,    
    data_date  string,  
    accno  string,      
    curr_type  string,  
    ifrs9_pd12_value  double,
    ifrs9_ccf_value  double,
    ifrs9_lgd_value  double
)partitioned by(pt_dt string)
STORED AS ORCFILE
TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt 
= '2020-7-31');
insert into z_tab_1 partition(pt_dt = '2020-7-31') 
values('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3),
('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
drop table if exists z_tab_2;
CREATE TABLE z_tab_2(              
    task_id  string,                
    data_date  string,              
    accno  string,                  
    curr_type  string,              
    ifrs9_pd12_value  double,       
    ifrs9_ccf_value  double,        
    ifrs9_lgd_value  double
) 
CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, 
CURR_TYPE) INTO 2000 BUCKETS 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORCFILE;

set hive.enforce.bucketing=true;

INSERT OVERWRITE TABLE z_tab_2
SELECT  DCCR.TASK_ID
       ,DCCR.DATA_DATE
       ,DCCR.ACCNO
       ,DCCR.CURR_TYPE
       ,DCCR.IFRS9_PD12_VALUE
       ,DCCR.IFRS9_CCF_VALUE
       ,DCCR.IFRS9_LGD_VALUE 
FROM z_tab_1 DCCR
WHERE pt_dt = '2020-7-31';
{code}
{noformat}
Caused by: java.lang.NullPointerException  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
  
at 
org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
  
at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
at 
org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237)  
... 7 more{noformat}


> NPE occurs when bucket_version different bucket tables are joined
> -----------------------------------------------------------------
>
>                 Key: HIVE-24124
>                 URL: https://issues.apache.org/jira/browse/HIVE-24124
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>            Reporter: GuangMing Lu
>            Priority: Major
>
> {code:java}
> create table z_tab_1(
>     task_id  string,    
>     data_date  string,  
>     accno  string,      
>     curr_type  string,  
>     ifrs9_pd12_value  double,
>     ifrs9_ccf_value  double,
>     ifrs9_lgd_value  double
> )partitioned by(pt_dt string)
> STORED AS ORCFILE
> TBLPROPERTIES ('bucketing_version'='1');
> alter table z_tab_1 add partition(pt_dt = '2020-7-31');
> insert into z_tab_1 partition(pt_dt = '2020-7-31') values
> ('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3),
> ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
> ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
> ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
> ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
> ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
> ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
> ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
> ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
> ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
> drop table if exists z_tab_2;
> CREATE TABLE z_tab_2(              
>     task_id  string,                
>     data_date  string,              
>     accno  string,                  
>     curr_type  string,              
>     ifrs9_pd12_value  double,       
>     ifrs9_ccf_value  double,        
>     ifrs9_lgd_value  double
> ) 
> CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, 
> ACCNO, CURR_TYPE) INTO 2000 BUCKETS 
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
> STORED AS ORCFILE;
> set hive.enforce.bucketing=true;
> INSERT OVERWRITE TABLE z_tab_2
> SELECT  DCCR.TASK_ID
>        ,DCCR.DATA_DATE
>        ,DCCR.ACCNO
>        ,DCCR.CURR_TYPE
>        ,DCCR.IFRS9_PD12_VALUE
>        ,DCCR.IFRS9_CCF_VALUE
>        ,DCCR.IFRS9_LGD_VALUE 
> FROM z_tab_1 DCCR
> WHERE pt_dt = '2020-7-31';
> {code}
> {noformat}
> Caused by: java.lang.NullPointerException  
> at 
> org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)
>   
> at 
> org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)
>   
> at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
> at 
> org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) 
>  
> at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237) 
>  
> ... 7 more{noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to