[2/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)

2017-03-22 Thread hashutosh
http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientpositive/perf/query1.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/query1.q.out 
b/ql/src/test/results/clientpositive/perf/query1.q.out
index 09278e3..53acdcd 100644
--- a/ql/src/test/results/clientpositive/perf/query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/query1.q.out
@@ -47,131 +47,181 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE)
-Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
 limit:100
 Stage-1
-  Reducer 7
-  File Output Operator [FS_54]
-Limit [LIM_53] (rows=100 width=860)
+  Reducer 8
+  File Output Operator [FS_82]
+Limit [LIM_81] (rows=100 width=860)
   Number of rows:100
-  Select Operator [SEL_52] (rows=3227 width=860)
+  Select Operator [SEL_80] (rows=35493335 width=860)
 Output:["_col0"]
-  <-Reducer 6 [SIMPLE_EDGE]
-SHUFFLE [RS_51]
-  Select Operator [SEL_50] (rows=3227 width=860)
+  <-Reducer 7 [SIMPLE_EDGE]
+SHUFFLE [RS_79]
+  Select Operator [SEL_78] (rows=35493335 width=860)
 Output:["_col0"]
-Filter Operator [FIL_49] (rows=3227 width=860)
-  predicate:(_col2 > CASE WHEN (_col8 is null) THEN (null) 
ELSE (_col7) END)
-  Merge Join Operator [MERGEJOIN_78] (rows=9683 width=860)
-Conds:RS_45._col1=RS_46._col2(Left 
Outer),Output:["_col2","_col6","_col7","_col8"]
-  <-Reducer 13 [SIMPLE_EDGE]
-SHUFFLE [RS_46]
+Filter Operator [FIL_77] (rows=35493335 width=860)
+  predicate:(_col2 > CASE WHEN (_col10 is null) THEN (null) 
ELSE (_col9) END)
+  Merge Join Operator [MERGEJOIN_114] (rows=106480005 
width=860)
+Conds:RS_74._col1=RS_75._col2(Left 
Outer),Output:["_col2","_col6","_col9","_col10"]
+  <-Reducer 18 [SIMPLE_EDGE]
+SHUFFLE [RS_75]
   PartitionCols:_col2
-  Select Operator [SEL_38] (rows=7918783 width=77)
+  Select Operator [SEL_73] (rows=7918783 width=77)
 Output:["_col0","_col1","_col2"]
-Group By Operator [GBY_37] (rows=7918783 width=77)
+Group By Operator [GBY_72] (rows=7918783 width=77)
   
Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1
-  Select Operator [SEL_33] (rows=15837566 width=77)
+  Select Operator [SEL_68] (rows=15837566 width=77)
 Output:["_col1","_col2"]
-Group By Operator [GBY_32] (rows=15837566 width=77)
+Group By Operator [GBY_67] (rows=15837566 width=77)
   
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
-<-Reducer 12 [SIMPLE_EDGE]
-  SHUFFLE [RS_31]
+<-Reducer 17 [SIMPLE_EDGE]
+  SHUFFLE [RS_66]
 PartitionCols:_col0
-Group By Operator [GBY_30] (rows=31675133 
width=77)
+Group By Operator [GBY_65] (rows=31675133 
width=77)
   
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1
-  Select Operator [SEL_29] (rows=31675133 
width=77)
+  Select Operator [SEL_64] (rows=31675133 
width=77)
 Output:["_col2","_col1","_col3"]
-Merge Join Operator [MERGEJOIN_77] 
(rows=31675133 width=77)
- 

[1/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)

2017-03-22 Thread hashutosh
Repository: hive
Updated Branches:
  refs/heads/master 9c692a5c4 -> 112cbd19c


http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientpositive/perf/query81.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out 
b/ql/src/test/results/clientpositive/perf/query81.q.out
index 25bd68e..8234780 100644
--- a/ql/src/test/results/clientpositive/perf/query81.q.out
+++ b/ql/src/test/results/clientpositive/perf/query81.q.out
@@ -59,163 +59,228 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
-Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
-Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE)
+Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE)
+Reducer 22 <- Map 25 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
+Reducer 23 <- Reducer 22 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
 limit:100
 Stage-1
-  Reducer 4
-  File Output Operator [FS_67]
-Limit [LIM_66] (rows=100 width=860)
+  Reducer 5
+  File Output Operator [FS_101]
+Limit [LIM_100] (rows=100 width=860)
   Number of rows:100
-  Select Operator [SEL_65] (rows=3227 width=860)
+  Select Operator [SEL_99] (rows=35493335 width=860)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"]
-  <-Reducer 3 [SIMPLE_EDGE]
-SHUFFLE [RS_64]
-  Select Operator [SEL_63] (rows=3227 width=860)
+  <-Reducer 4 [SIMPLE_EDGE]
+SHUFFLE [RS_98]
+  Select Operator [SEL_97] (rows=35493335 width=860)
 
Output:["_col0","_col1","_col11","_col12","_col13","_col14","_col15","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-Filter Operator [FIL_62] (rows=3227 width=860)
-  predicate:(_col2 > CASE WHEN (_col22 is null) THEN (null) 
ELSE (_col21) END)
-  Select Operator [SEL_61] (rows=9683 width=860)
-
Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col20","_col21","_col22"]
-Merge Join Operator [MERGEJOIN_105] (rows=9683 
width=860)
-  
Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"]
-<-Reducer 10 [SIMPLE_EDGE]
-  SHUFFLE [RS_59]
-PartitionCols:_col0
-Merge Join Operator [MERGEJOIN_104] (rows=2420 
width=1014)
-  Conds:RS_51._col1=RS_52._col2(Left 
Outer),Output:["_col0","_col2","_col3","_col4"]
-<-Reducer 16 [SIMPLE_EDGE]
-  SHUFFLE [RS_52]
-PartitionCols:_col2
-Select Operator [SEL_50] (rows=8711661 width=106)
-  Output:["_col0","_col1","_col2"]
-  Group By Operator [GBY_49] (rows=8711661 
width=106)
-
Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0
-Select Operator [SEL_45] (rows=17423323 
width=106)
-  Output:["_col0","_col2"]
-  Group By Operator [GBY_44] (rows=17423323 
width=106)
+Filter Operator [FIL_96] (rows=35493335 width=860)
+  predicate:(_col2 > CASE WHEN (_col24 is null) THEN (null) 
ELSE (_col23) END)
+  Merge Join Operator [MERGEJOIN_153] (rows=106480005 
width=860)
+Conds:RS_93._col1=RS_94._col2(Left 

[3/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)

2017-03-22 Thread hashutosh
http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index da387d7..f6dc397 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -1681,9 +1681,10 @@ STAGE PLANS:
 Tez
  A masked pattern was here 
   Edges:
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 
(SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 Reducer 5 <- Map 4 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -1707,6 +1708,26 @@ STAGE PLANS:
 Map Operator Tree:
 TableScan
   alias: part
+  Statistics: Num rows: 26 Data size: 3146 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Filter Operator
+predicate: (p_name = p_name) (type: boolean)
+Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: COMPLETE
+Group By Operator
+  keys: p_name (type: string)
+  mode: hash
+  outputColumnNames: _col0
+  Statistics: Num rows: 6 Data size: 726 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 6 Data size: 726 Basic stats: 
COMPLETE Column stats: COMPLETE
+Execution mode: llap
+LLAP IO: no inputs
+Map 6 
+Map Operator Tree:
+TableScan
+  alias: part
   Statistics: Num rows: 26 Data size: 3250 Basic stats: 
COMPLETE Column stats: COMPLETE
   Filter Operator
 predicate: (p_name = p_name) (type: boolean)
@@ -1735,16 +1756,18 @@ STAGE PLANS:
   Merge Join Operator
 condition map:
  Left Outer Join0 to 1
+ Left Outer Join0 to 2
 keys:
   0 _col0 (type: string)
-  1 _col2 (type: string)
-outputColumnNames: _col1, _col2, _col3
-Statistics: Num rows: 26 Data size: 312 Basic stats: COMPLETE 
Column stats: COMPLETE
+  1 _col0 (type: string)
+  2 _col2 (type: string)
+outputColumnNames: _col1, _col4, _col5
+Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
 Filter Operator
-  predicate: ((_col1 + 100) < CASE WHEN (_col3 is null) THEN 
(null) ELSE (_col2) END) (type: boolean)
-  Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE 
Column stats: COMPLETE
+  predicate: ((_col1 + 100) < CASE WHEN (_col5 is null) THEN 
(null) ELSE (_col4) END) (type: boolean)
+  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-Statistics: Num rows: 8 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
 Group By Operator
   aggregations: count()
   mode: hash
@@ -1773,6 +1796,32 @@ STAGE PLANS:
 Execution mode: llap
 Reduce Operator Tree:
   Group By Operator
+keys: KEY._col0 (type: string)
+mode: mergepartial
+outputColumnNames: _col0
+Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE 
Column stats: COMPLETE
+Group By Operator
+  aggregations: count()
+  keys: _col0 (type: string)
+  mode: complete
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE 
Column stats: COMPLETE
+  Filter Operator
+predicate: (sq_count_check(_col1) <= 1) (type: boolean)
+Statistics: Num rows: 2 Data size: 258 Basic stats: 
COMPLETE Column stats: COMPLETE
+Select Operator
+  expressions: _col0 (type: string)
+  outputColumnNames: _col0
+  

[4/4] hive git commit: HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet Garg via Ashutosh Chauhan)

2017-03-22 Thread hashutosh
HIVE-16229 : Wrong result for correlated scalar subquery with aggregate (Vineet 
Garg via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/112cbd19
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/112cbd19
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/112cbd19

Branch: refs/heads/master
Commit: 112cbd19c96bbe298aa371e82ac867caca189b15
Parents: 9c692a5
Author: Vineet Garg 
Authored: Wed Mar 22 22:54:25 2017 -0700
Committer: Ashutosh Chauhan 
Committed: Wed Mar 22 22:54:25 2017 -0700

--
 .../calcite/rules/HiveSubQueryRemoveRule.java   |   38 +-
 .../subquery_scalar_corr_multi_rows.q   |2 +
 .../subquery_scalar_corr_multi_rows.q.out   |5 +
 .../clientpositive/llap/subquery_scalar.q.out   | 1392 ++
 .../results/clientpositive/perf/query1.q.out|  248 ++--
 .../results/clientpositive/perf/query30.q.out   |  341 +++--
 .../results/clientpositive/perf/query6.q.out|  353 +++--
 .../results/clientpositive/perf/query81.q.out   |  341 +++--
 8 files changed, 1859 insertions(+), 861 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/112cbd19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 76e0780..7c96f3d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -164,6 +164,25 @@ public abstract class HiveSubQueryRemoveRule extends 
RelOptRule{
 boolean isCorrScalarAgg) {
 switch (e.getKind()) {
 case SCALAR_QUERY:
+builder.push(e.rel);
+// returns single row/column
+builder.aggregate(builder.groupKey(),
+builder.count(false, "cnt"));
+
+SqlFunction countCheck = new SqlFunction("sq_count_check", 
SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT,
+InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, 
SqlFunctionCategory.USER_DEFINED_FUNCTION);
+
+// we create FILTER (sq_count_check(count()) <= 1) instead of 
PROJECT because RelFieldTrimmer
+//  ends up getting rid of Project since it is not used 
further up the tree
+
builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
+builder.call(countCheck, builder.field("cnt")),
+builder.literal(1)));
+if( !variablesSet.isEmpty())
+{
+builder.join(JoinRelType.LEFT, builder.literal(true), 
variablesSet);
+}
+else
+builder.join(JoinRelType.INNER, builder.literal(true), 
variablesSet);
 if(isCorrScalarAgg) {
 // Transformation :
 // Outer Query Left Join (inner query) on correlated 
predicate and preserve rows only from left side.
@@ -193,26 +212,7 @@ public abstract class HiveSubQueryRemoveRule extends 
RelOptRule{
 
 //Transformation is to left join for correlated predicates and 
inner join otherwise,
 // but do a count on inner side before that to make sure it 
generates atmost 1 row.
-builder.push(e.rel);
-// returns single row/column
-builder.aggregate(builder.groupKey(),
-builder.count(false, "cnt"));
-
-SqlFunction countCheck = new SqlFunction("sq_count_check", 
SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT,
-InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, 
SqlFunctionCategory.USER_DEFINED_FUNCTION);
 
-// we create FILTER (sq_count_check(count()) <= 1) instead of 
PROJECT because RelFieldTrimmer
-//  ends up getting rid of Project since it is not used 
further up the tree
-
builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
-builder.call(countCheck, builder.field("cnt")),
-builder.literal(1)));
-
-if( !variablesSet.isEmpty())
-{
-builder.join(JoinRelType.LEFT, builder.literal(true), 
variablesSet);
-}
-else
-builder.join(JoinRelType.INNER, builder.literal(true), 

[1/2] hive git commit: HIVE-15867: Add blobstore tests for import/export (Juan Rodríguez Hortalá, reviewed by Sergio Pena)

2017-03-22 Thread spena
Repository: hive
Updated Branches:
  refs/heads/master e434f8320 -> 9c692a5c4


http://git-wip-us.apache.org/repos/asf/hive/blob/9c692a5c/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out
--
diff --git 
a/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out
 
b/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out
new file mode 100644
index 000..764c86d
--- /dev/null
+++ 
b/itests/hive-blobstore/src/test/results/clientpositive/import_blobstore_to_warehouse.q.out
@@ -0,0 +1,157 @@
+PREHOOK: query: DROP TABLE exim_employee
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE exim_employee
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE exim_employee (emp_id int COMMENT "employee id")
+COMMENT "employee table"
+PARTITIONED BY (emp_country string COMMENT "two char iso code")
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@exim_employee
+POSTHOOK: query: CREATE TABLE exim_employee (emp_id int COMMENT "employee id")
+COMMENT "employee table"
+PARTITIONED BY (emp_country string COMMENT "two char iso code")
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@exim_employee
+PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="in")
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@exim_employee
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="in")
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@exim_employee
+POSTHOOK: Output: default@exim_employee@emp_country=in
+PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="us")
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@exim_employee
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="us")
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@exim_employee
+POSTHOOK: Output: default@exim_employee@emp_country=us
+PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="cz")
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@exim_employee
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="cz")
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@exim_employee
+POSTHOOK: Output: default@exim_employee@emp_country=cz
+PREHOOK: query: DESCRIBE EXTENDED exim_employee
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@exim_employee
+POSTHOOK: query: DESCRIBE EXTENDED exim_employee
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@exim_employee
+emp_id int employee id 
+emp_countrystring  two char iso code   
+
+# Partition Information 
+# col_name data_type   comment 
+
+emp_countrystring  two char iso code   
+
+ A masked pattern was here 
+PREHOOK: query: SELECT * FROM exim_employee
+PREHOOK: type: QUERY
+PREHOOK: Input: default@exim_employee
+PREHOOK: Input: default@exim_employee@emp_country=cz
+PREHOOK: Input: default@exim_employee@emp_country=in
+PREHOOK: Input: default@exim_employee@emp_country=us
+ A masked pattern was here 
+POSTHOOK: query: SELECT * FROM exim_employee
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@exim_employee
+POSTHOOK: Input: default@exim_employee@emp_country=cz
+POSTHOOK: Input: default@exim_employee@emp_country=in
+POSTHOOK: Input: default@exim_employee@emp_country=us
+ A masked pattern was here 
+1  cz
+2  cz
+3  cz
+4  cz
+5  cz
+6  cz
+1  in
+2  in
+3  in
+4  in
+5  in
+6  in
+1  us
+2  us
+3  us
+4  us
+5  us
+6  us
+PREHOOK: query: EXPORT TABLE exim_employee PARTITION (emp_country='us')
+TO '### test.blobstore.path 
###/import_blobstore_to_warehouse/export/exim_employee'
+PREHOOK: type: EXPORT
+PREHOOK: Input: default@exim_employee@emp_country=us
+PREHOOK: Output: ### test.blobstore.path 
###/import_blobstore_to_warehouse/export/exim_employee
+POSTHOOK: query: EXPORT TABLE exim_employee PARTITION (emp_country='us')
+TO '### test.blobstore.path 
###/import_blobstore_to_warehouse/export/exim_employee'
+POSTHOOK: type: EXPORT
+POSTHOOK: Input: default@exim_employee@emp_country=us
+POSTHOOK: Output: ### test.blobstore.path 

[2/2] hive git commit: HIVE-15867: Add blobstore tests for import/export (Juan Rodríguez Hortalá, reviewed by Sergio Pena)

2017-03-22 Thread spena
HIVE-15867: Add blobstore tests for import/export (Juan Rodríguez Hortalá, 
reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9c692a5c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9c692a5c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9c692a5c

Branch: refs/heads/master
Commit: 9c692a5c48f699d4a01a539093bc51cb4c03107d
Parents: e434f83
Author: Juan Rodríguez Hortalá 
Authored: Wed Mar 22 17:32:02 2017 -0500
Committer: Sergio Pena 
Committed: Wed Mar 22 17:32:02 2017 -0500

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +-
 ...import_addpartition_blobstore_to_blobstore.q |  45 +++
 .../import_addpartition_blobstore_to_local.q|  44 +++
 ...import_addpartition_blobstore_to_warehouse.q |  41 +++
 .../import_addpartition_local_to_blobstore.q|  44 +++
 .../import_blobstore_to_blobstore.q |  30 ++
 .../import_blobstore_to_blobstore_nonpart.q |  25 ++
 .../clientpositive/import_blobstore_to_local.q  |  30 ++
 .../import_blobstore_to_warehouse.q |  28 ++
 .../import_blobstore_to_warehouse_nonpart.q |  23 ++
 .../clientpositive/import_local_to_blobstore.q  |  31 ++
 .../src/test/resources/hive-site.xml|   5 +
 ...rt_addpartition_blobstore_to_blobstore.q.out | 283 +++
 ...import_addpartition_blobstore_to_local.q.out | 283 +++
 ...rt_addpartition_blobstore_to_warehouse.q.out | 271 ++
 ...import_addpartition_local_to_blobstore.q.out | 277 ++
 .../import_blobstore_to_blobstore.q.out | 161 +++
 .../import_blobstore_to_blobstore_nonpart.q.out | 103 +++
 .../import_blobstore_to_local.q.out | 161 +++
 .../import_blobstore_to_warehouse.q.out | 157 ++
 .../import_blobstore_to_warehouse_nonpart.q.out |  99 +++
 .../import_local_to_blobstore.q.out | 159 +++
 22 files changed, 2301 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9c692a5c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 1b186f7..d4a0b2e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2133,7 +2133,7 @@ public class HiveConf extends Configuration {
 "When true the HDFS location stored in the index file will be ignored 
at runtime.\n" +
 "If the data got moved or the name of the cluster got changed, the 
index data should still be usable."),
 
-HIVE_EXIM_URI_SCHEME_WL("hive.exim.uri.scheme.whitelist", 
"hdfs,pfile,file",
+HIVE_EXIM_URI_SCHEME_WL("hive.exim.uri.scheme.whitelist", 
"hdfs,pfile,file,s3,s3a",
 "A comma separated list of acceptable URI schemes for import and 
export."),
 // temporary variable for testing. This is added just to turn off this 
feature in case of a bug in
 // deployment. It has not been documented in hive-default.xml 
intentionally, this should be removed

http://git-wip-us.apache.org/repos/asf/hive/blob/9c692a5c/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q
--
diff --git 
a/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q
 
b/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q
new file mode 100644
index 000..8fee8ed
--- /dev/null
+++ 
b/itests/hive-blobstore/src/test/queries/clientpositive/import_addpartition_blobstore_to_blobstore.q
@@ -0,0 +1,45 @@
+-- Check we can create a partitioned table in the warehouse, 
+-- export it to a blobstore, and then import its different partitions
+-- using the blobstore as target location
+DROP TABLE exim_employee;
+CREATE TABLE exim_employee (emp_id int COMMENT "employee id")
+COMMENT "employee table"
+PARTITIONED BY (emp_country string COMMENT "two char iso code")
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="in");
+LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="us");
+LOAD DATA LOCAL INPATH "../../data/files/test.dat"
+INTO TABLE exim_employee PARTITION (emp_country="cz");
+
+DESCRIBE EXTENDED exim_employee;
+SELECT * FROM exim_employee;
+
+dfs -rm -r -f 
${hiveconf:test.blobstore.path.unique}/import_addpartition_blobstore_to_blobstore/export/exim_employee;
+EXPORT TABLE 

hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman)

2017-03-22 Thread ekoifman
Repository: hive
Updated Branches:
  refs/heads/master 8613ef200 -> ea3be9549


HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive 
Sink (Kalyan via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea3be954
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea3be954
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea3be954

Branch: refs/heads/master
Commit: ea3be9549dca7eaed5e838bbcb69d2372817ce42
Parents: 8613ef2
Author: Eugene Koifman 
Authored: Wed Mar 22 13:22:08 2017 -0700
Committer: Eugene Koifman 
Committed: Wed Mar 22 13:22:08 2017 -0700

--
 .../hcatalog/streaming/StrictRegexWriter.java   | 188 +++
 .../hive/hcatalog/streaming/TestStreaming.java  |  81 +++-
 2 files changed, 263 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ea3be954/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
--
diff --git 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
new file mode 100644
index 000..78987ab
--- /dev/null
+++ 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.RegexSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Streaming Writer handles text input data with regex. Uses
+ * org.apache.hadoop.hive.serde2.RegexSerDe
+ */
+public class StrictRegexWriter extends AbstractRecordWriter {
+  private RegexSerDe serde;
+  private final StructObjectInspector recordObjInspector;
+  private final ObjectInspector[] bucketObjInspectors;
+  private final StructField[] bucketStructFields;
+  
+  /**
+   * @param endPoint the end point to write to
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint, StreamingConnection conn)
+  throws ConnectionError, SerializationError, StreamingException {
+this(null, endPoint, null, conn);
+  }
+  
+  /**
+   * @param endPoint the end point to write to
+   * @param conf a Hive conf object. Should be null if not using advanced Hive 
settings.
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf, 
StreamingConnection conn)
+  throws ConnectionError, SerializationError, StreamingException {
+this(null, endPoint, conf, conn);
+  }
+  
+  /**
+   * @param regex to parse the data
+   * @param endPoint the end point to write to
+   * @param conf a Hive conf object. Should be null if not using advanced Hive 
settings.
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   * 

hive git commit: HIVE-16107: JDBC: HttpClient should retry one more time on NoHttpResponseException (Vaibhav Gumashta reviewed by Daniel Dai, Thejas Nair)

2017-03-22 Thread vgumashta
Repository: hive
Updated Branches:
  refs/heads/master ce695b5d4 -> 8613ef200


HIVE-16107: JDBC: HttpClient should retry one more time on 
NoHttpResponseException (Vaibhav Gumashta reviewed by Daniel Dai, Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8613ef20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8613ef20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8613ef20

Branch: refs/heads/master
Commit: 8613ef200fb1e1372f41a225bd358f06e754f906
Parents: ce695b5
Author: Vaibhav Gumashta 
Authored: Wed Mar 22 11:02:23 2017 -0700
Committer: Vaibhav Gumashta 
Committed: Wed Mar 22 11:02:23 2017 -0700

--
 .../apache/hive/jdbc/TestJdbcWithMiniHS2.java   | 32 
 .../org/apache/hive/jdbc/HiveConnection.java| 24 +--
 2 files changed, 53 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8613ef20/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index afe23f8..3780b4e 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -966,6 +966,38 @@ public class TestJdbcWithMiniHS2 {
   }
 
   /**
+   * Test for jdbc driver retry on NoHttpResponseException
+   * @throws Exception
+   */
+  @Test
+  public void testHttpRetryOnServerIdleTimeout() throws Exception {
+// Stop HiveServer2
+stopMiniHS2();
+HiveConf conf = new HiveConf();
+conf.set("hive.server2.transport.mode", "http");
+// Set server's idle timeout to a very low value
+conf.set("hive.server2.thrift.http.max.idle.time", "5");
+startMiniHS2(conf);
+String userName = System.getProperty("user.name");
+Connection conn = getConnection(miniHS2.getJdbcURL(testDbName), userName, 
"password");
+Statement stmt = conn.createStatement();
+stmt.execute("select from_unixtime(unix_timestamp())");
+// Sleep for longer than server's idletimeout and execute a query
+TimeUnit.SECONDS.sleep(10);
+try {
+  stmt.execute("select from_unixtime(unix_timestamp())");
+} catch (Exception e) {
+  fail("Not expecting exception: " + e);
+} finally {
+  if (conn != null) {
+conn.close();
+  }
+}
+// Restore original state
+restoreMiniHS2AndConnections();
+  }
+
+  /**
* Tests that DataNucleus' NucleusContext.classLoaderResolverMap clears 
cached class objects
* (& hence doesn't leak classloaders) on closing any session
*

http://git-wip-us.apache.org/repos/asf/hive/blob/8613ef20/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
--
diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java 
b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
index 1695c5d..fb18adb 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
@@ -41,6 +41,7 @@ import org.apache.hive.service.rpc.thrift.TSessionHandle;
 import org.apache.http.HttpRequestInterceptor;
 import org.apache.http.HttpResponse;
 import org.apache.http.client.CookieStore;
+import org.apache.http.client.HttpRequestRetryHandler;
 import org.apache.http.client.ServiceUnavailableRetryStrategy;
 import org.apache.http.config.Registry;
 import org.apache.http.config.RegistryBuilder;
@@ -386,9 +387,9 @@ public class HiveConnection implements java.sql.Connection {
* Add an interceptor to pass username/password in the header.
* In https mode, the entire information is encrypted
*/
-  requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), 
getPassword(),
-cookieStore, 
cookieName, useSsl,
-additionalHttpHeaders);
+requestInterceptor =
+new HttpBasicAuthInterceptor(getUserName(), getPassword(), 
cookieStore, cookieName,
+useSsl, additionalHttpHeaders);
   }
 }
 // Configure http client for cookie based authentication
@@ -421,6 +422,23 @@ public class HiveConnection implements java.sql.Connection 
{
 } else {
   httpClientBuilder = HttpClientBuilder.create();
 }
+// In case the server's idletimeout is set to a lower value, it might 
close it's side of
+// connection. However we retry one more time on NoHttpResponseException
+

[3/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)

2017-03-22 Thread mmccline
http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index 0182a46..c534cb5 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -76,12 +76,16 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: string)
 Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
-Execution mode: llap
+Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
 inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
 Reduce Vectorization:
@@ -249,12 +253,16 @@ STAGE PLANS:
   sort order: +
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
-Execution mode: llap
+Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
 inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
@@ -401,12 +409,16 @@ STAGE PLANS:
   sort order: +
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
-Execution mode: llap
+Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
 inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
@@ -554,12 +566,16 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 2000 Data size: 736000 Basic 
stats: COMPLETE Column stats: COMPLETE
   value expressions: _col1 (type: string)
-Execution mode: llap
+Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
 inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 5 
 Map Operator Tree:
 TableScan
@@ -773,12 +789,16 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 2000 Data size: 736000 Basic 
stats: COMPLETE Column stats: COMPLETE
   value expressions: _col1 (type: string)
-Execution mode: llap
+Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
-enabled: false
-

[2/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)

2017-03-22 Thread mmccline
http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out 
b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
index 7687cff..dc80037 100644
--- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
@@ -29,21 +29,40 @@ STAGE PLANS:
 TableScan
   alias: li
   Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 3, val 1) -> boolean, 
SelectColumnIsNotNull(col 1) -> boolean, SelectColumnIsNotNull(col 0) -> 
boolean) -> boolean
 predicate: ((l_linenumber = 1) and l_partkey is not null 
and l_orderkey is not null) (type: boolean)
 Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: l_orderkey (type: int), l_partkey (type: 
int), l_suppkey (type: int)
   outputColumnNames: _col0, _col1, _col2
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0, 1, 2]
   Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
+Spark Hash Table Sink Vectorization:
+className: VectorSparkHashTableSinkOperator
+native: true
 keys:
   0 _col0 (type: int)
   1 _col1 (type: int)
+Execution mode: vectorized
 Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
 inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Local Work:
   Map Reduce Local Work
 Map 4 
@@ -51,30 +70,51 @@ STAGE PLANS:
 TableScan
   alias: lineitem
   Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprAndExpr(children: 
FilterStringGroupColEqualStringScalar(col 14, val AIR) -> boolean, 
SelectColumnIsNotNull(col 0) -> boolean) -> boolean
 predicate: ((l_shipmode = 'AIR') and l_orderkey is not 
null) (type: boolean)
 Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: l_orderkey (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
   Group By Operator
 Group By Vectorization:
-vectorOutput: false
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
 native: false
-projectedOutputColumns: null
+projectedOutputColumns: []
 keys: _col0 (type: int)
 mode: hash

[4/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)

2017-03-22 Thread mmccline
HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) 
(Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce695b5d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce695b5d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce695b5d

Branch: refs/heads/master
Commit: ce695b5d4ae07d0bfc79fb88fcb09cb99e9e4706
Parents: 9f5a3e3
Author: Matt McCline 
Authored: Wed Mar 22 03:06:34 2017 -0500
Committer: Matt McCline 
Committed: Wed Mar 22 03:06:34 2017 -0500

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |   1 +
 .../clientpositive/vector_groupby_mapjoin.q |  14 +
 .../clientpositive/vectorized_parquet_types.q   |   5 +-
 .../llap/dynpart_sort_opt_vectorization.q.out   |   4 +-
 .../llap/dynpart_sort_optimization2.q.out   |   4 +-
 .../results/clientpositive/llap/mergejoin.q.out |   8 +-
 .../clientpositive/llap/tez_join_hash.q.out |  10 +-
 .../llap/tez_vector_dynpart_hashjoin_2.q.out|   8 +-
 .../clientpositive/llap/vector_bucket.q.out |  21 +-
 .../llap/vector_decimal_round.q.out |  44 +-
 .../llap/vector_groupby_mapjoin.q.out   | 133 +-
 .../llap/vector_mapjoin_reduce.q.out| 167 ++-
 .../llap/vector_udf_character_length.q.out  |   2 +-
 .../llap/vector_udf_octet_length.q.out  |   2 +-
 .../llap/vectorized_bucketmapjoin1.q.out|  21 +-
 .../vectorized_dynamic_partition_pruning.q.out  | 470 +--
 .../clientpositive/llap/vectorized_join46.q.out |  58 +--
 .../llap/vectorized_parquet_types.q.out | 189 ++--
 .../test/results/clientpositive/mergejoin.q.out |  11 +
 .../spark/vector_mapjoin_reduce.q.out   | 157 ++-
 .../test/results/clientpositive/structin.q.out  |   1 +
 .../clientpositive/tez/explainuser_3.q.out  |  20 +-
 .../results/clientpositive/tez_join_hash.q.out  |   5 +
 .../results/clientpositive/vector_bucket.q.out  |  21 +-
 .../clientpositive/vector_cast_constant.q.out   |  17 +-
 .../results/clientpositive/vector_char_2.q.out  |  34 +-
 .../clientpositive/vector_decimal_round.q.out   |  43 +-
 .../clientpositive/vector_groupby4.q.out|  17 +-
 .../clientpositive/vector_groupby6.q.out|  17 +-
 .../clientpositive/vector_groupby_mapjoin.q.out | 165 ++-
 .../clientpositive/vector_groupby_reduce.q.out  |  51 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  | 112 -
 .../vector_mr_diff_schema_alias.q.out   |  18 +-
 .../clientpositive/vector_orderby_5.q.out   |  17 +-
 .../vector_reduce_groupby_decimal.q.out |  17 +-
 .../clientpositive/vector_string_concat.q.out   |  17 +-
 .../vector_tablesample_rows.q.out   |  17 +-
 .../vector_udf_character_length.q.out   |   1 +
 .../vector_udf_octet_length.q.out   |   1 +
 .../clientpositive/vectorization_13.q.out   |  34 +-
 .../clientpositive/vectorization_14.q.out   |   9 +-
 .../clientpositive/vectorization_15.q.out   |   9 +-
 .../clientpositive/vectorization_limit.q.out|  17 +-
 .../clientpositive/vectorized_date_funcs.q.out  |  17 +-
 .../vectorized_parquet_types.q.out  |  17 +-
 .../clientpositive/vectorized_shufflejoin.q.out |  23 +-
 47 files changed, 1658 insertions(+), 392 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 62908f9..1b186f7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2751,9 +2751,9 @@ public class HiveConf extends Configuration {
 
HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT("hive.vectorized.use.vectorized.input.format",
 true,
 "This flag should be set to true to enable vectorizing with vectorized 
input file format capable SerDe.\n" +
 "The default value is true."),
-
HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize",
 false,
+
HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize",
 true,
 "This flag should be set to true to enable vectorizing rows using 
vector deserialize.\n" +
-"The default value is false."),
+"The default value is true."),
 
HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize",
 false,
 "This flag should be set to true to enable vectorizing using 

[1/4] hive git commit: HIVE-15784: Vectorization: Turn on text vectorization by default (vector serde) (Matt McCline, reviewed by Sergey Shelukhin)

2017-03-22 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/master 9f5a3e3d8 -> ce695b5d4


http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/vectorization_14.q.out
--
diff --git a/ql/src/test/results/clientpositive/vectorization_14.q.out 
b/ql/src/test/results/clientpositive/vectorization_14.q.out
index 775c3ef..ec4f7cd 100644
--- a/ql/src/test/results/clientpositive/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_14.q.out
@@ -139,10 +139,15 @@ STAGE PLANS:
   sort order: 
   Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE 
Column stats: NONE
   value expressions: _col3 (type: boolean), _col5 (type: double), 
_col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: 
float), _col10 (type: float), _col11 (type: float), _col12 (type: double), 
_col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 
(type: double), _col17 (type: double), _col18 (type: double), _col19 (type: 
double), _col20 (type: double), _col21 (type: double)
+  Execution mode: vectorized
   Map Vectorization:
-  enabled: false
-  enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+  enabled: true
+  enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+  groupByVectorOutput: true
   inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+  allNative: false
+  usesVectorUDFAdaptor: false
+  vectorized: true
   Reduce Vectorization:
   enabled: false
   enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/vectorization_15.q.out
--
diff --git a/ql/src/test/results/clientpositive/vectorization_15.q.out 
b/ql/src/test/results/clientpositive/vectorization_15.q.out
index 35667db..12d8141 100644
--- a/ql/src/test/results/clientpositive/vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_15.q.out
@@ -135,10 +135,15 @@ STAGE PLANS:
   sort order: +++
   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col7 (type: double), _col8 (type: 
decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: 
float), _col12 (type: double), _col13 (type: double), _col14 (type: double), 
_col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 
(type: int), _col19 (type: decimal(13,2)), _col20 (type: double)
+  Execution mode: vectorized
   Map Vectorization:
-  enabled: false
-  enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+  enabled: true
+  enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+  groupByVectorOutput: true
   inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+  allNative: false
+  usesVectorUDFAdaptor: false
+  vectorized: true
   Reduce Vectorization:
   enabled: false
   enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/ce695b5d/ql/src/test/results/clientpositive/vectorization_limit.q.out
--
diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out 
b/ql/src/test/results/clientpositive/vectorization_limit.q.out
index 3ea3564..a9db0d0 100644
--- a/ql/src/test/results/clientpositive/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out
@@ -677,15 +677,28 @@ STAGE PLANS:
 Map Reduce
   Map Operator Tree:
   TableScan
+TableScan Vectorization:
+native: true
+projectedOutputColumns: [0, 1]
 Reduce Output Operator
   key expressions: _col1 (type: bigint), _col0 (type: double)
   sort order: ++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns 
IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS 
true
+  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false, No TopN IS false
   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.3
+  Execution mode: vectorized
   Map Vectorization:
-  enabled: false
-