Repository: hive
Updated Branches:
  refs/heads/master e55ccd291 -> 8fca02e58


HIVE-20910 : Insert in bucketed table fails due to dynamic partition sort 
optimization (Vineet Garg via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8fca02e5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8fca02e5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8fca02e5

Branch: refs/heads/master
Commit: 8fca02e584533db77b7f1ea7a79bfba3e81ac1bd
Parents: e55ccd2
Author: Vineet Garg <vg...@apache.org>
Authored: Wed Nov 14 07:49:33 2018 -0800
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Wed Nov 14 07:49:33 2018 -0800

----------------------------------------------------------------------
 .../optimizer/SortedDynPartitionOptimizer.java  |   8 ++
 .../clientpositive/dynpart_sort_opt_bucketing.q |  22 ++++
 .../dynpart_sort_opt_bucketing.q.out            | 115 +++++++++++++++++++
 3 files changed, 145 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8fca02e5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index 4d9963a..ff35e8c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -387,6 +387,14 @@ public class SortedDynPartitionOptimizer extends Transform 
{
               rsChild.getSchema().getSignature().size()) {
             return false;
           }
+          // if child is select and contains expression which isn't column it 
shouldn't
+          // be removed because otherwise we will end up with different 
types/schema later
+          // while introducing select for RS
+          for(ExprNodeDesc expr: rsChild.getColumnExprMap().values()){
+            if(!(expr instanceof ExprNodeColumnDesc)){
+              return false;
+            }
+          }
           rsParent.getChildOperators().clear();
           rsParent.getChildOperators().add(rsGrandChild);
           rsGrandChild.getParentOperators().clear();

http://git-wip-us.apache.org/repos/asf/hive/blob/8fca02e5/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q 
b/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q
index f2f36d1..f54aeca 100644
--- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q
+++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_bucketing.q
@@ -72,3 +72,25 @@ select 'bucket_6';
 dfs -cat ${hiveconf:hive.metastore.warehouse.dir}/t1_n147/e=epart/000006_0;
 select 'bucket_8';
 dfs -cat ${hiveconf:hive.metastore.warehouse.dir}/t1_n147/e=epart/000008_0;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.stats.autogather=false;
+
+CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk int, ca_address_id 
string, ca_street_number string, ca_street_name string,
+    ca_street_type string, ca_suite_number string, ca_city string, ca_county 
string, ca_state string,
+    ca_zip string, ca_country string, ca_gmt_offset decimal(5,2))
+    PARTITIONED BY (ca_location_type string)
+    CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC 
TBLPROPERTIES('transactional'='true');
+
+explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION 
(ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126',
+                'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George 
County', 'VA', '28003', 'United States',
+                '-5', 'single family');
+
+INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION (ca_location_type) 
VALUES (5555, 'AAAAAAAADLFBAAAA', '126',
+                'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George 
County', 'VA', '28003', 'United States',
+                '-5', 'single family');
+DROP TABLE dynpart_sort_opt_bucketing_test;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/8fca02e5/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out 
b/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out
index 21e4891..4bbf2a4 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_bucketing.q.out
@@ -237,3 +237,118 @@ 
fffbe3c110c390ec20218e5ad4a026ff515668ed55488b717319b556daa962a1002015-01-21
 fffb1b226efc3cfaac8d73647ce4fa4e82413d67265fb55366ac3a4996518738012015-01-21
 fff56191e39b15f0e2f04984c70152fb1bde2ecba52ff5a73b4c28bf4d58c017002015-01-21
 
fff4166378aa9d94cd4f8a9cd543375890a61b4f09a57dbfb31a66b33b3e3fd9\N\N2015-01-21
+PREHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk 
int, ca_address_id string, ca_street_number string, ca_street_name string,
+    ca_street_type string, ca_suite_number string, ca_city string, ca_county 
string, ca_state string,
+    ca_zip string, ca_country string, ca_gmt_offset decimal(5,2))
+    PARTITIONED BY (ca_location_type string)
+    CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dynpart_sort_opt_bucketing_test
+POSTHOOK: query: CREATE TABLE dynpart_sort_opt_bucketing_test (ca_address_sk 
int, ca_address_id string, ca_street_number string, ca_street_name string,
+    ca_street_type string, ca_suite_number string, ca_city string, ca_county 
string, ca_state string,
+    ca_zip string, ca_country string, ca_gmt_offset decimal(5,2))
+    PARTITIONED BY (ca_location_type string)
+    CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC 
TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test
+PREHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test 
PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126',
+                'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George 
County', 'VA', '28003', 'United States',
+                '-5', 'single family')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dynpart_sort_opt_bucketing_test
+POSTHOOK: query: explain INSERT INTO TABLE dynpart_sort_opt_bucketing_test 
PARTITION (ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126',
+                'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George 
County', 'VA', '28003', 'United States',
+                '-5', 'single family')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: _dummy_table
+            Row Limit Per Split: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Select Operator
+              expressions: array(const 
struct(5555,'AAAAAAAADLFBAAAA','126','Highland Park','Court','Suite E','San 
Jose','King George County','VA','28003','United States','-5','single family')) 
(type: 
array<struct<col1:int,col2:string,col3:string,col4:string,col5:string,col6:string,col7:string,col8:string,col9:string,col10:string,col11:string,col12:string,col13:string>>)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+              UDTF Operator
+                Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE 
Column stats: COMPLETE
+                function name: inline
+                Select Operator
+                  expressions: col1 (type: int), col2 (type: string), col3 
(type: string), col4 (type: string), col5 (type: string), col6 (type: string), 
col7 (type: string), col8 (type: string), col9 (type: string), col10 (type: 
string), col11 (type: string), col12 (type: string), col13 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Map-reduce partition columns: _col8 (type: string)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: 
string), _col9 (type: string), _col10 (type: string), _col11 (type: string), 
_col12 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), 
VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: 
string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 
(type: string), VALUE._col8 (type: string), VALUE._col9 (type: string), 
VALUE._col10 (type: string), CAST( VALUE._col11 AS decimal(5,2)) (type: 
decimal(5,2)), VALUE._col12 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12
+          Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.dynpart_sort_opt_bucketing_test
+            Write Type: INSERT
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ca_location_type 
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.dynpart_sort_opt_bucketing_test
+          Write Type: INSERT
+
+PREHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION 
(ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126',
+                'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George 
County', 'VA', '28003', 'United States',
+                '-5', 'single family')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dynpart_sort_opt_bucketing_test
+POSTHOOK: query: INSERT INTO TABLE dynpart_sort_opt_bucketing_test PARTITION 
(ca_location_type) VALUES (5555, 'AAAAAAAADLFBAAAA', '126',
+                'Highland Park', 'Court', 'Suite E', 'San Jose', 'King George 
County', 'VA', '28003', 'United States',
+                '-5', 'single family')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: 
default@dynpart_sort_opt_bucketing_test@ca_location_type=single family
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_address_id SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_address_sk SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_city SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_country SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_county SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_gmt_offset SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_state SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_street_name SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_street_number SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_street_type SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_suite_number SCRIPT []
+POSTHOOK: Lineage: dynpart_sort_opt_bucketing_test 
PARTITION(ca_location_type=single family).ca_zip SCRIPT []
+PREHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dynpart_sort_opt_bucketing_test
+PREHOOK: Output: default@dynpart_sort_opt_bucketing_test
+POSTHOOK: query: DROP TABLE dynpart_sort_opt_bucketing_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dynpart_sort_opt_bucketing_test
+POSTHOOK: Output: default@dynpart_sort_opt_bucketing_test

Reply via email to