This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/hive.git
commit 83e39aeac870105a62decd97b48ea7b63c7a9043 Author: Deepak Jaiswal <djais...@apache.org> AuthorDate: Tue Sep 25 23:26:17 2018 -0700 HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman) --- ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 | Bin 0 -> 501 bytes ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 | Bin 0 -> 465 bytes .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 7 +- .../queries/clientpositive/load_data_using_job.q | 18 +++- .../clientpositive/llap/load_data_using_job.q.out | 110 ++++++++++++++++++++- 5 files changed, 128 insertions(+), 7 deletions(-) diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 new file mode 100644 index 0000000..020bdcc Binary files /dev/null and b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 differ diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 new file mode 100644 index 0000000..8c2604d Binary files /dev/null and b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 differ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index cbacd05..ee12f64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -26,11 +26,13 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.ArrayList; -import java.util.HashSet; + import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.StringUtils; @@ -474,6 +476,9 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer { // wipe out partition columns tempTableObj.setPartCols(new ArrayList<>()); + // Reset table params + tempTableObj.setParameters(new HashMap<>()); + // Set data location and input format, it must be text tempTableObj.setDataLocation(new Path(fromURI)); if (inputFormatClassName != null && serDeClassName != null) { diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q index b760d9b..970a752 100644 --- a/ql/src/test/queries/clientpositive/load_data_using_job.q +++ b/ql/src/test/queries/clientpositive/load_data_using_job.q @@ -91,4 +91,20 @@ load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; select * from srcbucket_mapjoin_n8; -drop table srcbucket_mapjoin_n8; \ No newline at end of file +drop table srcbucket_mapjoin_n8; + +-- Load into ACID table using ORC files +set hive.mapred.mode=nonstrict; +set hive.optimize.ppd=true; +set hive.optimize.index.filter=true; +set hive.tez.bucket.pruning=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true'); +explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; +load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; + +select * from orc_test_txn; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out index 765ffdf..8a82467 100644 --- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out +++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out @@ -977,16 +977,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_mapjoin_n8__temp_table_for_load_data__ - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -996,10 +996,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3018,3 +3018,103 @@ POSTHOOK: query: drop table srcbucket_mapjoin_n8 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@srcbucket_mapjoin_n8 POSTHOOK: Output: default@srcbucket_mapjoin_n8 +PREHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test_txn +POSTHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test_txn +#### A masked pattern was here #### +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_test_txn__temp_table_for_load_data__ + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), name (type: string), dept (type: string), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_test_txn + Write Type: INSERT + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + year + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_test_txn + Write Type: INSERT + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test_txn__temp_table_for_load_data__ +PREHOOK: Output: default@orc_test_txn +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test_txn__temp_table_for_load_data__ +POSTHOOK: Output: default@orc_test_txn@year=2016 +POSTHOOK: Output: default@orc_test_txn@year=2017 +POSTHOOK: Output: default@orc_test_txn@year=2018 +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from orc_test_txn +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test_txn +PREHOOK: Input: default@orc_test_txn@year=2016 +PREHOOK: Input: default@orc_test_txn@year=2017 +PREHOOK: Input: default@orc_test_txn@year=2018 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_test_txn +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test_txn +POSTHOOK: Input: default@orc_test_txn@year=2016 +POSTHOOK: Input: default@orc_test_txn@year=2017 +POSTHOOK: Input: default@orc_test_txn@year=2018 +#### A masked pattern was here #### +9 Harris CSE 2017 +8 Henry CSE 2016 +10 Haley CSE 2018