[ https://issues.apache.org/jira/browse/HIVE-24124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
GuangMing Lu updated HIVE-24124: -------------------------------- Description: {code:java} create table z_tab_1( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double )partitioned by(pt_dt string) STORED AS ORCFILE TBLPROPERTIES ('bucketing_version'='1'); alter table z_tab_1 add partition(pt_dt = '2020-7-31'); insert into z_tab_1 partition(pt_dt = '2020-7-31') values ('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3), ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); drop table if exists z_tab_2; CREATE TABLE z_tab_2( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double ) CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS ORCFILE; set hive.enforce.bucketing=true; INSERT OVERWRITE TABLE z_tab_2 SELECT DCCR.TASK_ID ,DCCR.DATA_DATE ,DCCR.ACCNO ,DCCR.CURR_TYPE ,DCCR.IFRS9_PD12_VALUE ,DCCR.IFRS9_CCF_VALUE ,DCCR.IFRS9_LGD_VALUE FROM z_tab_1 DCCR WHERE pt_dt = '2020-7-31'; {code} {noformat} Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237) ... 7 more{noformat} was: {code:java} create table z_tab_1( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double )partitioned by(pt_dt string) STORED AS ORCFILE TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt = '2020-7-31'); insert into z_tab_1 partition(pt_dt = '2020-7-31') values('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3), ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); drop table if exists z_tab_2; CREATE TABLE z_tab_2( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double ) CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS ORCFILE; set hive.enforce.bucketing=true; INSERT OVERWRITE TABLE z_tab_2 SELECT DCCR.TASK_ID ,DCCR.DATA_DATE ,DCCR.ACCNO ,DCCR.CURR_TYPE ,DCCR.IFRS9_PD12_VALUE ,DCCR.IFRS9_CCF_VALUE ,DCCR.IFRS9_LGD_VALUE FROM z_tab_1 DCCR WHERE pt_dt = '2020-7-31'; {code} {noformat} Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237) ... 7 more{noformat} > NPE occurs when bucket_version different bucket tables are joined > ----------------------------------------------------------------- > > Key: HIVE-24124 > URL: https://issues.apache.org/jira/browse/HIVE-24124 > Project: Hive > Issue Type: Bug > Components: Hive > Reporter: GuangMing Lu > Priority: Major > > {code:java} > create table z_tab_1( > task_id string, > data_date string, > accno string, > curr_type string, > ifrs9_pd12_value double, > ifrs9_ccf_value double, > ifrs9_lgd_value double > )partitioned by(pt_dt string) > STORED AS ORCFILE > TBLPROPERTIES ('bucketing_version'='1'); > alter table z_tab_1 add partition(pt_dt = '2020-7-31'); > insert into z_tab_1 partition(pt_dt = '2020-7-31') values > ('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3), > ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), > ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), > ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), > ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), > ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), > ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), > ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), > ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), > ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); > drop table if exists z_tab_2; > CREATE TABLE z_tab_2( > task_id string, > data_date string, > accno string, > curr_type string, > ifrs9_pd12_value double, > ifrs9_ccf_value double, > ifrs9_lgd_value double > ) > CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, > ACCNO, CURR_TYPE) INTO 2000 BUCKETS > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' > STORED AS ORCFILE; > set hive.enforce.bucketing=true; > INSERT OVERWRITE TABLE z_tab_2 > SELECT DCCR.TASK_ID > ,DCCR.DATA_DATE > ,DCCR.ACCNO > ,DCCR.CURR_TYPE > ,DCCR.IFRS9_PD12_VALUE > ,DCCR.IFRS9_CCF_VALUE > ,DCCR.IFRS9_LGD_VALUE > FROM z_tab_1 DCCR > WHERE pt_dt = '2020-7-31'; > {code} > {noformat} > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) > > at > org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) > > at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) > > at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237) > > ... 7 more{noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)