Repository: hive Updated Branches: refs/heads/master e55ccd291 -> 8fca02e58
HIVE-20910 : Insert in bucketed table fails due to dynamic partition sort optimization (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8fca02e5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8fca02e5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8fca02e5 Branch: refs/heads/master Commit: 8fca02e584533db77b7f1ea7a79bfba3e81ac1bd Parents: e55ccd2 Author: Vineet Garg <vg...@apache.org> Authored: Wed Nov 14 07:49:33 2018 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Wed Nov 14 07:49:33 2018 -0800 ---------------------------------------------------------------------- .../optimizer/SortedDynPartitionOptimizer.java | 8 ++ .../clientpositive/dynpart_sort_opt_bucketing.q | 22 ++++ .../dynpart_sort_opt_bucketing.q.out | 115 +++++++++++++++++++ 3 files changed, 145 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8fca02e5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 4d9963a..ff35e8c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -387,6 +387,14 @@ public class SortedDynPartitionOptimizer extends Transform { rsChild.getSchema().getSignature().size()) { return false; } + // if child is select and contains expression which isn't column it shouldn't + // be removed because otherwise we will end up with different types/schema later + // while introducing select for RS + for(ExprNodeDesc expr: rsChild.getColumnExprMap().values()){ + if(!(expr instanceof ExprNodeColumnDesc)){ + return false; + } + } rsParent.getChildOperators().clear(); rsParent.getChildOperators().add(rsGrandChild); rsGrandChild.getParentOperators().clear(); http://git-wip-us.apache.org/repos/asf/hive/blob/8fca02e5/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q b/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q index f2f36d1..f54aeca 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q @@ -72,3 +72,25 @@ select 'bucket_6'; dfs -cat ${hiveconf:hive.metastore.warehouse.dir}/t1_n147/e=epart/000006_0; select 'bucket_8'; dfs -cat ${hiveconf:hive.metastore.warehouse.dir}/t1_n147/e=epart/000008_0; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.stats.autogather=false; + +CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, + ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, + ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) + PARTITIONED BY (ca_location_type string) + CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); + +explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family'); + +INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family'); +DROP TABLE dynpart_sort_opt_bucketing_test; + + http://git-wip-us.apache.org/repos/asf/hive/blob/8fca02e5/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out index 21e4891..4bbf2a4 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out @@ -237,3 +237,118 @@ fffbe3c110c390ec20218e5ad4a026ff515668ed55488b717319b556daa962a1002015-01-21 fffb1b226efc3cfaac8d73647ce4fa4e82413d67265fb55366ac3a4996518738012015-01-21 fff56191e39b15f0e2f04984c70152fb1bde2ecba52ff5a73b4c28bf4d58c017002015-01-21 fff4166378aa9d94cd4f8a9cd543375890a61b4f09a57dbfb31a66b33b3e3fd9\N\N2015-01-21 +PREHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, + ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, + ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) + PARTITIONED BY (ca_location_type string) + CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, + ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, + ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) + PARTITIONED BY (ca_location_type string) + CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test +PREHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(5555,'AAAAAAAADLFBAAAA','126','Highland Park','Court','Suite E','San Jose','King George County','VA','28003','United States','-5','single family')) (type: array<struct<col1:int,col2:string,col3:string,col4:string,col5:string,col6:string,col7:string,col8:string,col9:string,col10:string,col11:string,col12:string,col13:string>>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string), col3 (type: string), col4 (type: string), col5 (type: string), col6 (type: string), col7 (type: string), col8 (type: string), col9 (type: string), col10 (type: string), col11 (type: string), col12 (type: string), col13 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col8 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: string), VALUE._col9 (type: string), VALUE._col10 (type: string), CAST( VALUE._col11 AS decimal(5,2)) (type: decimal(5,2)), VALUE._col12 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dynpart_sort_opt_bucketing_test + Write Type: INSERT + + Stage: Stage-0 + Move Operator + tables: + partition: + ca_location_type + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dynpart_sort_opt_bucketing_test + Write Type: INSERT + +PREHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126', + 'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George County', 'VA', '28003', 'United States', + '-5', 'single family') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test@ca_location_type=single family +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_id SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_address_sk SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_city SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_country SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_county SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_gmt_offset SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_state SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_name SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_number SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_street_type SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_suite_number SCRIPT [] +POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test PARTITION(ca_location_type=single family).ca_zip SCRIPT [] +PREHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dynpart_sort_opt_bucketing_test +PREHOOK: Output: default@dynpart_sort_opt_bucketing_test +POSTHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test +POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test