[hive] 01/02: HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)

sankarh Mon, 15 Apr 2019 23:50:21 -0700

This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git


commit 83e39aeac870105a62decd97b48ea7b63c7a9043
Author: Deepak Jaiswal <djais...@apache.org>
AuthorDate: Tue Sep 25 23:26:17 2018 -0700

    HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out 
of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)
---
 ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 | Bin 0 -> 501 bytes
 ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 | Bin 0 -> 465 bytes
 .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java |   7 +-
 .../queries/clientpositive/load_data_using_job.q   |  18 +++-
 .../clientpositive/llap/load_data_using_job.q.out  | 110 ++++++++++++++++++++-
 5 files changed, 128 insertions(+), 7 deletions(-)

diff --git 
a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
 
b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
new file mode 100644
index 0000000..020bdcc
Binary files /dev/null and 
b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
 differ
diff --git 
a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
 
b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
new file mode 100644
index 0000000..8c2604d
Binary files /dev/null and 
b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
 differ
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index cbacd05..ee12f64 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -26,11 +26,13 @@ import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.ArrayList;
-import java.util.HashSet;
+
 
 import org.antlr.runtime.tree.Tree;
 import org.apache.commons.lang.StringUtils;
@@ -474,6 +476,9 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
     // wipe out partition columns
     tempTableObj.setPartCols(new ArrayList<>());
 
+    // Reset table params
+    tempTableObj.setParameters(new HashMap<>());
+
     // Set data location and input format, it must be text
     tempTableObj.setDataLocation(new Path(fromURI));
     if (inputFormatClassName != null && serDeClassName != null) {
diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q 
b/ql/src/test/queries/clientpositive/load_data_using_job.q
index b760d9b..970a752 100644
--- a/ql/src/test/queries/clientpositive/load_data_using_job.q
+++ b/ql/src/test/queries/clientpositive/load_data_using_job.q
@@ -91,4 +91,20 @@ load data local inpath 
'../../data/files/load_data_job/load_data_1_partition.txt
 INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
 SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 select * from srcbucket_mapjoin_n8;
-drop table srcbucket_mapjoin_n8;
\ No newline at end of file
+drop table srcbucket_mapjoin_n8;
+
+-- Load into ACID table using ORC files
+set hive.mapred.mode=nonstrict;
+set hive.optimize.ppd=true;
+set hive.optimize.index.filter=true;
+set hive.tez.bucket.pruning=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED 
BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true');
+explain load data local inpath '../../data/files/load_data_job_acid' into 
table orc_test_txn;
+load data local inpath '../../data/files/load_data_job_acid' into table 
orc_test_txn;
+
+select * from orc_test_txn;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out 
b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
index 765ffdf..8a82467 100644
--- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
+++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
@@ -977,16 +977,16 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_mapjoin_n8__temp_table_for_load_data__
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 47 Data size: 8648 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: int), value (type: string)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 47 Data size: 8648 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: int)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 47 Data size: 8648 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -996,10 +996,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 47 Data size: 8648 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -3018,3 +3018,103 @@ POSTHOOK: query: drop table srcbucket_mapjoin_n8
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@srcbucket_mapjoin_n8
 POSTHOOK: Output: default@srcbucket_mapjoin_n8
+PREHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept 
string) PARTITIONED BY (year integer) STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test_txn
+POSTHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept 
string) PARTITIONED BY (year integer) STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test_txn
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_test_txn__temp_table_for_load_data__
+                  Statistics: Num rows: 24 Data size: 9024 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int), name (type: string), dept 
(type: string), year (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 24 Data size: 9024 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 24 Data size: 9024 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                          name: default.orc_test_txn
+                      Write Type: INSERT
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            year 
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.orc_test_txn
+          Write Type: INSERT
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test_txn__temp_table_for_load_data__
+PREHOOK: Output: default@orc_test_txn
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test_txn__temp_table_for_load_data__
+POSTHOOK: Output: default@orc_test_txn@year=2016
+POSTHOOK: Output: default@orc_test_txn@year=2017
+POSTHOOK: Output: default@orc_test_txn@year=2018
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).dept SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept,
 type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).id SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id,
 type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).name SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name,
 type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).dept SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept,
 type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).id SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id,
 type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).name SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name,
 type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).dept SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept,
 type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).id SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id,
 type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).name SIMPLE 
[(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name,
 type:string, comment:null), ]
+PREHOOK: query: select * from orc_test_txn
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test_txn
+PREHOOK: Input: default@orc_test_txn@year=2016
+PREHOOK: Input: default@orc_test_txn@year=2017
+PREHOOK: Input: default@orc_test_txn@year=2018
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test_txn
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test_txn
+POSTHOOK: Input: default@orc_test_txn@year=2016
+POSTHOOK: Input: default@orc_test_txn@year=2017
+POSTHOOK: Input: default@orc_test_txn@year=2018
+#### A masked pattern was here ####
+9      Harris  CSE     2017
+8      Henry   CSE     2016
+10     Haley   CSE     2018

[hive] 01/02: HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)

Reply via email to