[2/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientpositive/perf/query1.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/query1.q.out b/ql/src/test/results/clientpositive/perf/query1.q.out index 09278e3..53acdcd 100644 --- a/ql/src/test/results/clientpositive/perf/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/query1.q.out @@ -47,131 +47,181 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_54] -Limit [LIM_53] (rows=100 width=860) + Reducer 8 + File Output Operator [FS_82] +Limit [LIM_81] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_52] (rows=3227 width=860) + Select Operator [SEL_80] (rows=35493335 width=860) Output:["_col0"] - <-Reducer 6 [SIMPLE_EDGE] -SHUFFLE [RS_51] - Select Operator [SEL_50] (rows=3227 width=860) + <-Reducer 7 [SIMPLE_EDGE] +SHUFFLE [RS_79] + Select Operator [SEL_78] (rows=35493335 width=860) Output:["_col0"] -Filter Operator [FIL_49] (rows=3227 width=860) - predicate:(_col2 > CASE WHEN (_col8 is null) THEN (null) ELSE (_col7) END) - Merge Join Operator [MERGEJOIN_78] (rows=9683 width=860) -Conds:RS_45._col1=RS_46._col2(Left Outer),Output:["_col2","_col6","_col7","_col8"] - <-Reducer 13 [SIMPLE_EDGE] -SHUFFLE [RS_46] +Filter Operator [FIL_77] (rows=35493335 width=860) + predicate:(_col2 > CASE WHEN (_col10 is null) THEN (null) ELSE (_col9) END) + Merge Join Operator [MERGEJOIN_114] (rows=106480005 width=860) +Conds:RS_74._col1=RS_75._col2(Left Outer),Output:["_col2","_col6","_col9","_col10"] + <-Reducer 18 [SIMPLE_EDGE] +SHUFFLE [RS_75] PartitionCols:_col2 - Select Operator [SEL_38] (rows=7918783 width=77) + Select Operator [SEL_73] (rows=7918783 width=77) Output:["_col0","_col1","_col2"] -Group By Operator [GBY_37] (rows=7918783 width=77) +Group By Operator [GBY_72] (rows=7918783 width=77) Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 - Select Operator [SEL_33] (rows=15837566 width=77) + Select Operator [SEL_68] (rows=15837566 width=77) Output:["_col1","_col2"] -Group By Operator [GBY_32] (rows=15837566 width=77) +Group By Operator [GBY_67] (rows=15837566 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 -<-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_31] +<-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_66] PartitionCols:_col0 -Group By Operator [GBY_30] (rows=31675133 width=77) +Group By Operator [GBY_65] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Select Operator [SEL_29] (rows=31675133 width=77) + Select Operator [SEL_64] (rows=31675133 width=77) Output:["_col2","_col1","_col3"] -Merge Join Operator [MERGEJOIN_77] (rows=31675133 width=77) -
[1/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master 9c692a5c4 -> 112cbd19c http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientpositive/perf/query81.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out b/ql/src/test/results/clientpositive/perf/query81.q.out index 25bd68e..8234780 100644 --- a/ql/src/test/results/clientpositive/perf/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/query81.q.out @@ -59,163 +59,228 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 22 <- Map 25 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 - File Output Operator [FS_67] -Limit [LIM_66] (rows=100 width=860) + Reducer 5 + File Output Operator [FS_101] +Limit [LIM_100] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_65] (rows=3227 width=860) + Select Operator [SEL_99] (rows=35493335 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Reducer 3 [SIMPLE_EDGE] -SHUFFLE [RS_64] - Select Operator [SEL_63] (rows=3227 width=860) + <-Reducer 4 [SIMPLE_EDGE] +SHUFFLE [RS_98] + Select Operator [SEL_97] (rows=35493335 width=860) Output:["_col0","_col1","_col11","_col12","_col13","_col14","_col15","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] -Filter Operator [FIL_62] (rows=3227 width=860) - predicate:(_col2 > CASE WHEN (_col22 is null) THEN (null) ELSE (_col21) END) - Select Operator [SEL_61] (rows=9683 width=860) - Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col20","_col21","_col22"] -Merge Join Operator [MERGEJOIN_105] (rows=9683 width=860) - Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"] -<-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_59] -PartitionCols:_col0 -Merge Join Operator [MERGEJOIN_104] (rows=2420 width=1014) - Conds:RS_51._col1=RS_52._col2(Left Outer),Output:["_col0","_col2","_col3","_col4"] -<-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_52] -PartitionCols:_col2 -Select Operator [SEL_50] (rows=8711661 width=106) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_49] (rows=8711661 width=106) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 -Select Operator [SEL_45] (rows=17423323 width=106) - Output:["_col0","_col2"] - Group By Operator [GBY_44] (rows=17423323 width=106) +Filter Operator [FIL_96] (rows=35493335 width=860) + predicate:(_col2 > CASE WHEN (_col24 is null) THEN (null) ELSE (_col23) END) + Merge Join Operator [MERGEJOIN_153] (rows=106480005 width=860) +Conds:RS_93._col1=RS_94._col2(Left
[3/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index da387d7..f6dc397 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1681,9 +1681,10 @@ STAGE PLANS: Tez A masked pattern was here Edges: -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) A masked pattern was here Vertices: Map 1 @@ -1707,6 +1708,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator +predicate: (p_name = p_name) (type: boolean) +Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE +Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: COMPLETE +Execution mode: llap +LLAP IO: no inputs +Map 6 +Map Operator Tree: +TableScan + alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (p_name = p_name) (type: boolean) @@ -1735,16 +1756,18 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join0 to 1 + Left Outer Join0 to 2 keys: 0 _col0 (type: string) - 1 _col2 (type: string) -outputColumnNames: _col1, _col2, _col3 -Statistics: Num rows: 26 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col0 (type: string) + 2 _col2 (type: string) +outputColumnNames: _col1, _col4, _col5 +Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col1 + 100) < CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END) (type: boolean) - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((_col1 + 100) < CASE WHEN (_col5 is null) THEN (null) ELSE (_col4) END) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1773,6 +1796,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator +keys: KEY._col0 (type: string) +mode: mergepartial +outputColumnNames: _col0 +Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: COMPLETE +Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator +predicate: (sq_count_check(_col1) <= 1) (type: boolean) +Statistics: Num rows: 2 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE +Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 +
[4/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)
HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh ChauhanProject: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/112cbd19 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/112cbd19 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/112cbd19 Branch: refs/heads/master Commit: 112cbd19c96bbe298aa371e82ac867caca189b15 Parents: 9c692a5 Author: Vineet Garg Authored: Wed Mar 22 22:54:25 2017 -0700 Committer: Ashutosh Chauhan Committed: Wed Mar 22 22:54:25 2017 -0700 -- .../calcite/rules/HiveSubQueryRemoveRule.java | 38 +- .../subquery_scalar_corr_multi_rows.q |2 + .../subquery_scalar_corr_multi_rows.q.out |5 + .../clientpositive/llap/subquery_scalar.q.out | 1392 ++ .../results/clientpositive/perf/query1.q.out| 248 ++-- .../results/clientpositive/perf/query30.q.out | 341 +++-- .../results/clientpositive/perf/query6.q.out| 353 +++-- .../results/clientpositive/perf/query81.q.out | 341 +++-- 8 files changed, 1859 insertions(+), 861 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 76e0780..7c96f3d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -164,6 +164,25 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ boolean isCorrScalarAgg) { switch (e.getKind()) { case SCALAR_QUERY: +builder.push(e.rel); +// returns single row/column +builder.aggregate(builder.groupKey(), +builder.count(false, "cnt")); + +SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, +InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); + +// we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer +// ends up getting rid of Project since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, +builder.call(countCheck, builder.field("cnt")), +builder.literal(1))); +if( !variablesSet.isEmpty()) +{ +builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); +} +else +builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); if(isCorrScalarAgg) { // Transformation : // Outer Query Left Join (inner query) on correlated predicate and preserve rows only from left side. @@ -193,26 +212,7 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{ //Transformation is to left join for correlated predicates and inner join otherwise, // but do a count on inner side before that to make sure it generates atmost 1 row. -builder.push(e.rel); -// returns single row/column -builder.aggregate(builder.groupKey(), -builder.count(false, "cnt")); - -SqlFunction countCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, -InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION); -// we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer -// ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, -builder.call(countCheck, builder.field("cnt")), -builder.literal(1))); - -if( !variablesSet.isEmpty()) -{ -builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); -} -else -builder.join(JoinRelType.INNER, builder.literal(true),
[1/2] hive git commit: HIVE-15867: Add blobstore tests for import/export (Juan Rodríguez Hortalá, reviewed by Sergio Pena)
Repository: hive Updated Branches: refs/heads/master e434f8320 -> 9c692a5c4 http://git-wip-us.apache.org/repos/asf/hive/blob/9c692a5c/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out -- diff --git a/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out b/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out new file mode 100644 index 000..764c86d --- /dev/null +++ b/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: DROP TABLE exim_employee +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE exim_employee +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE exim_employee (emp_id int COMMENT "employee id") +COMMENT "employee table" +PARTITIONED BY (emp_country string COMMENT "two char iso code") +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@exim_employee +POSTHOOK: query: CREATE TABLE exim_employee (emp_id int COMMENT "employee id") +COMMENT "employee table" +PARTITIONED BY (emp_country string COMMENT "two char iso code") +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@exim_employee +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="in") +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@exim_employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="in") +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@exim_employee +POSTHOOK: Output: default@exim_employee@emp_country=in +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="us") +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@exim_employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="us") +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@exim_employee +POSTHOOK: Output: default@exim_employee@emp_country=us +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="cz") +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@exim_employee +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="cz") +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@exim_employee +POSTHOOK: Output: default@exim_employee@emp_country=cz +PREHOOK: query: DESCRIBE EXTENDED exim_employee +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@exim_employee +POSTHOOK: query: DESCRIBE EXTENDED exim_employee +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@exim_employee +emp_id int employee id +emp_countrystring two char iso code + +# Partition Information +# col_name data_type comment + +emp_countrystring two char iso code + + A masked pattern was here +PREHOOK: query: SELECT * FROM exim_employee +PREHOOK: type: QUERY +PREHOOK: Input: default@exim_employee +PREHOOK: Input: default@exim_employee@emp_country=cz +PREHOOK: Input: default@exim_employee@emp_country=in +PREHOOK: Input: default@exim_employee@emp_country=us + A masked pattern was here +POSTHOOK: query: SELECT * FROM exim_employee +POSTHOOK: type: QUERY +POSTHOOK: Input: default@exim_employee +POSTHOOK: Input: default@exim_employee@emp_country=cz +POSTHOOK: Input: default@exim_employee@emp_country=in +POSTHOOK: Input: default@exim_employee@emp_country=us + A masked pattern was here +1 cz +2 cz +3 cz +4 cz +5 cz +6 cz +1 in +2 in +3 in +4 in +5 in +6 in +1 us +2 us +3 us +4 us +5 us +6 us +PREHOOK: query: EXPORT TABLE exim_employee PARTITION (emp_country='us') +TO '### test.blobstore.path ###/import_blobstore_to_warehouse/export/exim_employee' +PREHOOK: type: EXPORT +PREHOOK: Input: default@exim_employee@emp_country=us +PREHOOK: Output: ### test.blobstore.path ###/import_blobstore_to_warehouse/export/exim_employee +POSTHOOK: query: EXPORT TABLE exim_employee PARTITION (emp_country='us') +TO '### test.blobstore.path ###/import_blobstore_to_warehouse/export/exim_employee' +POSTHOOK: type: EXPORT +POSTHOOK: Input: default@exim_employee@emp_country=us +POSTHOOK: Output: ### test.blobstore.path
[2/2] hive git commit: HIVE-15867: Add blobstore tests for import/export (Juan Rodríguez Hortalá, reviewed by Sergio Pena)
HIVE-15867: Add blobstore tests for import/export (Juan RodrÃguez Hortalá, reviewed by Sergio Pena) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9c692a5c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9c692a5c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9c692a5c Branch: refs/heads/master Commit: 9c692a5c48f699d4a01a539093bc51cb4c03107d Parents: e434f83 Author: Juan RodrÃguez HortaláAuthored: Wed Mar 22 17:32:02 2017 -0500 Committer: Sergio Pena Committed: Wed Mar 22 17:32:02 2017 -0500 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +- ...import_addpartition_blobstore_to_blobstore.q | 45 +++ .../import_addpartition_blobstore_to_local.q| 44 +++ ...import_addpartition_blobstore_to_warehouse.q | 41 +++ .../import_addpartition_local_to_blobstore.q| 44 +++ .../import_blobstore_to_blobstore.q | 30 ++ .../import_blobstore_to_blobstore_nonpart.q | 25 ++ .../clientpositive/import_blobstore_to_local.q | 30 ++ .../import_blobstore_to_warehouse.q | 28 ++ .../import_blobstore_to_warehouse_nonpart.q | 23 ++ .../clientpositive/import_local_to_blobstore.q | 31 ++ .../src/test/resources/hive-site.xml| 5 + ...rt_addpartition_blobstore_to_blobstore.q.out | 283 +++ ...import_addpartition_blobstore_to_local.q.out | 283 +++ ...rt_addpartition_blobstore_to_warehouse.q.out | 271 ++ ...import_addpartition_local_to_blobstore.q.out | 277 ++ .../import_blobstore_to_blobstore.q.out | 161 +++ .../import_blobstore_to_blobstore_nonpart.q.out | 103 +++ .../import_blobstore_to_local.q.out | 161 +++ .../import_blobstore_to_warehouse.q.out | 157 ++ .../import_blobstore_to_warehouse_nonpart.q.out | 99 +++ .../import_local_to_blobstore.q.out | 159 +++ 22 files changed, 2301 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9c692a5c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1b186f7..d4a0b2e 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2133,7 +2133,7 @@ public class HiveConf extends Configuration { "When true the HDFS location stored in the index file will be ignored at runtime.\n" + "If the data got moved or the name of the cluster got changed, the index data should still be usable."), -HIVE_EXIM_URI_SCHEME_WL("hive.exim.uri.scheme.whitelist", "hdfs,pfile,file", +HIVE_EXIM_URI_SCHEME_WL("hive.exim.uri.scheme.whitelist", "hdfs,pfile,file,s3,s3a", "A comma separated list of acceptable URI schemes for import and export."), // temporary variable for testing. This is added just to turn off this feature in case of a bug in // deployment. It has not been documented in hive-default.xml intentionally, this should be removed http://git-wip-us.apache.org/repos/asf/hive/blob/9c692a5c/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q -- diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q b/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q new file mode 100644 index 000..8fee8ed --- /dev/null +++ b/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q @@ -0,0 +1,45 @@ +-- Check we can create a partitioned table in the warehouse, +-- export it to a blobstore, and then import its different partitions +-- using the blobstore as target location +DROP TABLE exim_employee; +CREATE TABLE exim_employee (emp_id int COMMENT "employee id") +COMMENT "employee table" +PARTITIONED BY (emp_country string COMMENT "two char iso code") +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="in"); +LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="us"); +LOAD DATA LOCAL INPATH "../../data/files/test.dat" +INTO TABLE exim_employee PARTITION (emp_country="cz"); + +DESCRIBE EXTENDED exim_employee; +SELECT * FROM exim_employee; + +dfs -rm -r -f ${hiveconf:test.blobstore.path.unique}/import_addpartition_blobstore_to_blobstore/export/exim_employee; +EXPORT TABLE
hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman)
Repository: hive Updated Branches: refs/heads/master 8613ef200 -> ea3be9549 HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea3be954 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea3be954 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea3be954 Branch: refs/heads/master Commit: ea3be9549dca7eaed5e838bbcb69d2372817ce42 Parents: 8613ef2 Author: Eugene KoifmanAuthored: Wed Mar 22 13:22:08 2017 -0700 Committer: Eugene Koifman Committed: Wed Mar 22 13:22:08 2017 -0700 -- .../hcatalog/streaming/StrictRegexWriter.java | 188 +++ .../hive/hcatalog/streaming/TestStreaming.java | 81 +++- 2 files changed, 263 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ea3be954/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java -- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java new file mode 100644 index 000..78987ab --- /dev/null +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java @@ -0,0 +1,188 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hcatalog.streaming; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.RegexSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * Streaming Writer handles text input data with regex. Uses + * org.apache.hadoop.hive.serde2.RegexSerDe + */ +public class StrictRegexWriter extends AbstractRecordWriter { + private RegexSerDe serde; + private final StructObjectInspector recordObjInspector; + private final ObjectInspector[] bucketObjInspectors; + private final StructField[] bucketStructFields; + + /** + * @param endPoint the end point to write to + * @param conn connection this Writer is to be used with + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint, StreamingConnection conn) + throws ConnectionError, SerializationError, StreamingException { +this(null, endPoint, null, conn); + } + + /** + * @param endPoint the end point to write to + * @param conf a Hive conf object. Should be null if not using advanced Hive settings. + * @param conn connection this Writer is to be used with + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf, StreamingConnection conn) + throws ConnectionError, SerializationError, StreamingException { +this(null, endPoint, conf, conn); + } + + /** + * @param regex to parse the data + * @param endPoint the end point to write to + * @param conf a Hive conf object. Should be null if not using advanced Hive settings. + * @param conn connection this Writer is to be used with + * @throws ConnectionError + *
hive git commit: HIVE-16107: JDBC: HttpClient should retry one more time on NoHttpResponseException (Vaibhav Gumashta reviewed by Daniel Dai, Thejas Nair)
Repository: hive Updated Branches: refs/heads/master ce695b5d4 -> 8613ef200 HIVE-16107: JDBC: HttpClient should retry one more time on NoHttpResponseException (Vaibhav Gumashta reviewed by Daniel Dai, Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8613ef20 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8613ef20 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8613ef20 Branch: refs/heads/master Commit: 8613ef200fb1e1372f41a225bd358f06e754f906 Parents: ce695b5 Author: Vaibhav GumashtaAuthored: Wed Mar 22 11:02:23 2017 -0700 Committer: Vaibhav Gumashta Committed: Wed Mar 22 11:02:23 2017 -0700 -- .../apache/hive/jdbc/TestJdbcWithMiniHS2.java | 32 .../org/apache/hive/jdbc/HiveConnection.java| 24 +-- 2 files changed, 53 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8613ef20/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index afe23f8..3780b4e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -966,6 +966,38 @@ public class TestJdbcWithMiniHS2 { } /** + * Test for jdbc driver retry on NoHttpResponseException + * @throws Exception + */ + @Test + public void testHttpRetryOnServerIdleTimeout() throws Exception { +// Stop HiveServer2 +stopMiniHS2(); +HiveConf conf = new HiveConf(); +conf.set("hive.server2.transport.mode", "http"); +// Set server's idle timeout to a very low value +conf.set("hive.server2.thrift.http.max.idle.time", "5"); +startMiniHS2(conf); +String userName = System.getProperty("user.name"); +Connection conn = getConnection(miniHS2.getJdbcURL(testDbName), userName, "password"); +Statement stmt = conn.createStatement(); +stmt.execute("select from_unixtime(unix_timestamp())"); +// Sleep for longer than server's idletimeout and execute a query +TimeUnit.SECONDS.sleep(10); +try { + stmt.execute("select from_unixtime(unix_timestamp())"); +} catch (Exception e) { + fail("Not expecting exception: " + e); +} finally { + if (conn != null) { +conn.close(); + } +} +// Restore original state +restoreMiniHS2AndConnections(); + } + + /** * Tests that DataNucleus' NucleusContext.classLoaderResolverMap clears cached class objects * (& hence doesn't leak classloaders) on closing any session * http://git-wip-us.apache.org/repos/asf/hive/blob/8613ef20/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java -- diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java index 1695c5d..fb18adb 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java @@ -41,6 +41,7 @@ import org.apache.hive.service.rpc.thrift.TSessionHandle; import org.apache.http.HttpRequestInterceptor; import org.apache.http.HttpResponse; import org.apache.http.client.CookieStore; +import org.apache.http.client.HttpRequestRetryHandler; import org.apache.http.client.ServiceUnavailableRetryStrategy; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; @@ -386,9 +387,9 @@ public class HiveConnection implements java.sql.Connection { * Add an interceptor to pass username/password in the header. * In https mode, the entire information is encrypted */ - requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword(), -cookieStore, cookieName, useSsl, -additionalHttpHeaders); +requestInterceptor = +new HttpBasicAuthInterceptor(getUserName(), getPassword(), cookieStore, cookieName, +useSsl, additionalHttpHeaders); } } // Configure http client for cookie based authentication @@ -421,6 +422,23 @@ public class HiveConnection implements java.sql.Connection { } else { httpClientBuilder = HttpClientBuilder.create(); } +// In case the server's idletimeout is set to a lower value, it might close it's side of +// connection. However we retry one more time on NoHttpResponseException +
[3/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 0182a46..c534cb5 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -76,12 +76,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE -Execution mode: llap +Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: -enabled: false -enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: false +usesVectorUDFAdaptor: false +vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -249,12 +253,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE -Execution mode: llap +Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: -enabled: false -enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true Map 4 Map Operator Tree: TableScan @@ -401,12 +409,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE -Execution mode: llap +Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: -enabled: false -enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true Map 4 Map Operator Tree: TableScan @@ -554,12 +566,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) -Execution mode: llap +Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: -enabled: false -enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true Map 5 Map Operator Tree: TableScan @@ -773,12 +789,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 736000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) -Execution mode: llap +Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: -enabled: false -
[2/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 7687cff..dc80037 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -29,21 +29,40 @@ STAGE PLANS: TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator +Filter Vectorization: +className: VectorFilterOperator +native: true +predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3, val 1) -> boolean, SelectColumnIsNotNull(col 1) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator +Spark Hash Table Sink Vectorization: +className: VectorSparkHashTableSinkOperator +native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) +Execution mode: vectorized Map Vectorization: -enabled: false -enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -51,30 +70,51 @@ STAGE PLANS: TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] Filter Operator +Filter Vectorization: +className: VectorFilterOperator +native: true +predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, SelectColumnIsNotNull(col 0) -> boolean) -> boolean predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: -vectorOutput: false +className: VectorGroupByOperator +vectorOutput: true +keyExpressions: col 0 native: false -projectedOutputColumns: null +projectedOutputColumns: [] keys: _col0 (type: int) mode: hash
[4/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)
HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce695b5d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce695b5d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce695b5d Branch: refs/heads/master Commit: ce695b5d4ae07d0bfc79fb88fcb09cb99e9e4706 Parents: 9f5a3e3 Author: Matt McClineAuthored: Wed Mar 22 03:06:34 2017 -0500 Committer: Matt McCline Committed: Wed Mar 22 03:06:34 2017 -0500 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +- .../hive/ql/optimizer/physical/Vectorizer.java | 1 + .../clientpositive/vector_groupby_mapjoin.q | 14 + .../clientpositive/vectorized_parquet_types.q | 5 +- .../llap/dynpart_sort_opt_vectorization.q.out | 4 +- .../llap/dynpart_sort_optimization2.q.out | 4 +- .../results/clientpositive/llap/mergejoin.q.out | 8 +- .../clientpositive/llap/tez_join_hash.q.out | 10 +- .../llap/tez_vector_dynpart_hashjoin_2.q.out| 8 +- .../clientpositive/llap/vector_bucket.q.out | 21 +- .../llap/vector_decimal_round.q.out | 44 +- .../llap/vector_groupby_mapjoin.q.out | 133 +- .../llap/vector_mapjoin_reduce.q.out| 167 ++- .../llap/vector_udf_character_length.q.out | 2 +- .../llap/vector_udf_octet_length.q.out | 2 +- .../llap/vectorized_bucketmapjoin1.q.out| 21 +- .../vectorized_dynamic_partition_pruning.q.out | 470 +-- .../clientpositive/llap/vectorized_join46.q.out | 58 +-- .../llap/vectorized_parquet_types.q.out | 189 ++-- .../test/results/clientpositive/mergejoin.q.out | 11 + .../spark/vector_mapjoin_reduce.q.out | 157 ++- .../test/results/clientpositive/structin.q.out | 1 + .../clientpositive/tez/explainuser_3.q.out | 20 +- .../results/clientpositive/tez_join_hash.q.out | 5 + .../results/clientpositive/vector_bucket.q.out | 21 +- .../clientpositive/vector_cast_constant.q.out | 17 +- .../results/clientpositive/vector_char_2.q.out | 34 +- .../clientpositive/vector_decimal_round.q.out | 43 +- .../clientpositive/vector_groupby4.q.out| 17 +- .../clientpositive/vector_groupby6.q.out| 17 +- .../clientpositive/vector_groupby_mapjoin.q.out | 165 ++- .../clientpositive/vector_groupby_reduce.q.out | 51 +- .../clientpositive/vector_mapjoin_reduce.q.out | 112 - .../vector_mr_diff_schema_alias.q.out | 18 +- .../clientpositive/vector_orderby_5.q.out | 17 +- .../vector_reduce_groupby_decimal.q.out | 17 +- .../clientpositive/vector_string_concat.q.out | 17 +- .../vector_tablesample_rows.q.out | 17 +- .../vector_udf_character_length.q.out | 1 + .../vector_udf_octet_length.q.out | 1 + .../clientpositive/vectorization_13.q.out | 34 +- .../clientpositive/vectorization_14.q.out | 9 +- .../clientpositive/vectorization_15.q.out | 9 +- .../clientpositive/vectorization_limit.q.out| 17 +- .../clientpositive/vectorized_date_funcs.q.out | 17 +- .../vectorized_parquet_types.q.out | 17 +- .../clientpositive/vectorized_shufflejoin.q.out | 23 +- 47 files changed, 1658 insertions(+), 392 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 62908f9..1b186f7 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2751,9 +2751,9 @@ public class HiveConf extends Configuration { HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT("hive.vectorized.use.vectorized.input.format", true, "This flag should be set to true to enable vectorizing with vectorized input file format capable SerDe.\n" + "The default value is true."), - HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", false, + HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", true, "This flag should be set to true to enable vectorizing rows using vector deserialize.\n" + -"The default value is false."), +"The default value is true."), HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", false, "This flag should be set to true to enable vectorizing using
[1/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 9f5a3e3d8 -> ce695b5d4 http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/vectorization_14.q.out -- diff --git a/ql/src/test/results/clientpositive/vectorization_14.q.out b/ql/src/test/results/clientpositive/vectorization_14.q.out index 775c3ef..ec4f7cd 100644 --- a/ql/src/test/results/clientpositive/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/vectorization_14.q.out @@ -139,10 +139,15 @@ STAGE PLANS: sort order: Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/vectorization_15.q.out -- diff --git a/ql/src/test/results/clientpositive/vectorization_15.q.out b/ql/src/test/results/clientpositive/vectorization_15.q.out index 35667db..12d8141 100644 --- a/ql/src/test/results/clientpositive/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/vectorization_15.q.out @@ -135,10 +135,15 @@ STAGE PLANS: sort order: +++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) + Execution mode: vectorized Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/vectorization_limit.q.out -- diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out index 3ea3564..a9db0d0 100644 --- a/ql/src/test/results/clientpositive/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -677,15 +677,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan +TableScan Vectorization: +native: true +projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 + Execution mode: vectorized Map Vectorization: - enabled: false -