svn commit: r1838505 - /hive/cms/trunk/content/people.mdtext

2018-08-20 Thread xuefu
Author: xuefu
Date: Mon Aug 20 22:09:32 2018
New Revision: 1838505

URL: http://svn.apache.org/viewvc?rev=1838505=rev
Log:
Update Xuefu's org in the committer list

Modified:
hive/cms/trunk/content/people.mdtext

Modified: hive/cms/trunk/content/people.mdtext
URL: 
http://svn.apache.org/viewvc/hive/cms/trunk/content/people.mdtext?rev=1838505=1838504=1838505=diff
==
--- hive/cms/trunk/content/people.mdtext (original)
+++ hive/cms/trunk/content/people.mdtext Mon Aug 20 22:09:32 2018
@@ -334,7 +334,7 @@ tr:nth-child(2n+1) {
 
 xuefu 
 Xuefu Zhang 
- 
+https://www.alibaba.com/;>Alibaba 
Inc 
 
 
 




hive git commit: HIVE-17257: Hive should merge empty files (Chao via Xuefu)

2018-01-18 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master ad1243bef -> 9816cfb44


HIVE-17257: Hive should merge empty files (Chao via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9816cfb4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9816cfb4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9816cfb4

Branch: refs/heads/master
Commit: 9816cfb44ad91a8c2a030e540a703983862e4123
Parents: ad1243b
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Jan 18 11:26:54 2018 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Jan 18 11:26:54 2018 -0800

--
 .../ql/plan/ConditionalResolverMergeFiles.java  |  2 +-
 .../test/queries/clientpositive/merge_empty.q   | 14 ++
 .../results/clientpositive/merge_empty.q.out| 45 
 3 files changed, 60 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
index 129347b..ebf2298 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
@@ -408,7 +408,7 @@ public class ConditionalResolverMergeFiles implements 
ConditionalResolver,
*/
   private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) {
 AverageSize averageSize = getAverageSize(inpFs, dirPath);
-if (averageSize.getTotalSize() <= 0) {
+if (averageSize.getTotalSize() < 0) {
   return -1;
 }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/test/queries/clientpositive/merge_empty.q
--
diff --git a/ql/src/test/queries/clientpositive/merge_empty.q 
b/ql/src/test/queries/clientpositive/merge_empty.q
new file mode 100644
index 000..188b39e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/merge_empty.q
@@ -0,0 +1,14 @@
+set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
+set hive.auto.convert.join=false;
+set mapreduce.job.reduces=1000;
+
+create table dummy (a string);
+insert overwrite directory '/tmp/test' select src.key from src join dummy on 
src.key = dummy.a;
+dfs -ls /tmp/test;
+
+-- verify that this doesn't merge for bucketed tables
+create table foo (a bigint, b string) clustered by (a) into 256 buckets;
+create table bar (a bigint, b string);
+insert overwrite table foo select * from bar;
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/foo;

http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/test/results/clientpositive/merge_empty.q.out
--
diff --git a/ql/src/test/results/clientpositive/merge_empty.q.out 
b/ql/src/test/results/clientpositive/merge_empty.q.out
new file mode 100644
index 000..c13cbf4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/merge_empty.q.out
@@ -0,0 +1,45 @@
+PREHOOK: query: create table dummy (a string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: create table dummy (a string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+ A masked pattern was here 
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+PREHOOK: query: create table foo (a bigint, b string) clustered by (a) into 
256 buckets
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (a bigint, b string) clustered by (a) into 
256 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: create table bar (a bigint, b string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bar
+POSTHOOK: query: create table bar (a bigint, b string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bar
+PREHOOK: query: insert overwrite table foo select * from bar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bar
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert overwrite table foo select * from bar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bar
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.a SIMPLE [(bar)bar.FieldSchem

[4/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out
new file mode 100644
index 000..144c3ec
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out
@@ -0,0 +1,182 @@
+PREHOOK: query: CREATE TABLE table_7 (int_col INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_7
+POSTHOOK: query: CREATE TABLE table_7 (int_col INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_7
+Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 
2' is a cross product
+Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 
'Reducer 3' is a cross product
+PREHOOK: query: explain
+SELECT
+(t1.int_col) * (t1.int_col) AS int_col
+FROM (
+SELECT
+MIN(NULL) OVER () AS int_col
+FROM table_7
+) t1
+WHERE
+(False) NOT IN (SELECT
+False AS boolean_col
+FROM (
+SELECT
+MIN(NULL) OVER () AS int_col
+FROM table_7
+) tt1
+WHERE
+(t1.int_col) = (tt1.int_col))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT
+(t1.int_col) * (t1.int_col) AS int_col
+FROM (
+SELECT
+MIN(NULL) OVER () AS int_col
+FROM table_7
+) t1
+WHERE
+(False) NOT IN (SELECT
+False AS boolean_col
+FROM (
+SELECT
+MIN(NULL) OVER () AS int_col
+FROM table_7
+) tt1
+WHERE
+(t1.int_col) = (tt1.int_col))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Spark
+  Edges:
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 
(PARTITION-LEVEL SORT, 1)
+Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 7 
(PARTITION-LEVEL SORT, 1)
+Reducer 5 <- Map 4 (GROUP, 1)
+Reducer 7 <- Map 6 (GROUP, 2)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: table_7
+  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+  Select Operator
+Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+Map 4 
+Map Operator Tree:
+TableScan
+  alias: table_7
+  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+  Select Operator
+Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+Filter Operator
+  predicate: false (type: boolean)
+  Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+  Group By Operator
+aggregations: count()
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: bigint)
+Map 6 
+Map Operator Tree:
+TableScan
+  alias: table_7
+  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+  Select Operator
+Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+Filter Operator
+  predicate: false (type: boolean)
+  Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+  Group By Operator
+keys: false (type: boolean)
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+Reduce Output Operator
+  key expressions: _col0 (type: boolean)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: boolean)
+  Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
+Reducer 2 
+Reduce Operator Tree:
+  Join Operator
+   

[6/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out
new file mode 100644
index 000..15f33f0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out
@@ -0,0 +1,38 @@
+PREHOOK: query: select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name 
from part z where y.p_name = z.p_name))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+ A masked pattern was here 
+POSTHOOK: query: select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name 
from part z where y.p_name = z.p_name))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+ A masked pattern was here 
+192697 almond antique blue firebrick mint  Manufacturer#5  Brand#52
MEDIUM BURNISHED TIN31  LG DRUM 1789.69 ickly ir
+90681  almond antique chartreuse khaki white   Manufacturer#3  Brand#31
MEDIUM BURNISHED TIN17  SM CASE 1671.68 are slyly after the sl
+85768  almond antique chartreuse lavender yellow   Manufacturer#1  
Brand#12LARGE BRUSHED STEEL 34  SM BAG  1753.76 refull
+42669  almond antique medium spring khaki  Manufacturer#5  Brand#51
STANDARD BURNISHED TIN  6   MED CAN 1611.66 sits haggl
+105685 almond antique violet chocolate turquoise   Manufacturer#2  
Brand#22MEDIUM ANODIZED COPPER  14  MED CAN 1690.68 ly pending requ
+48427  almond antique violet mint lemonManufacturer#4  Brand#42
PROMO POLISHED STEEL39  SM CASE 1375.42 hely ironic i
+86428  almond aquamarine burnished black steel Manufacturer#1  Brand#12
STANDARD ANODIZED STEEL 28  WRAP BAG1414.42 arefully 
+15103  almond aquamarine dodger light gainsboroManufacturer#5  
Brand#53ECONOMY BURNISHED STEEL 46  LG PACK 1018.1  packages hinder 
carefu
+45261  almond aquamarine floral ivory bisque   Manufacturer#4  Brand#42
SMALL PLATED STEEL  27  WRAP CASE   1206.26 careful
+65667  almond aquamarine pink moccasin thistle Manufacturer#1  Brand#12
LARGE BURNISHED STEEL   42  JUMBO CASE  1632.66 e across the expr
+132666 almond aquamarine rose maroon antique   Manufacturer#2  Brand#24
SMALL POLISHED NICKEL   25  MED BOX 1698.66 even 
+195606 almond aquamarine sandy cyan gainsboro  Manufacturer#2  Brand#25
STANDARD PLATED TIN 18  SM PKG  1701.6  ic de
+17927  almond aquamarine yellow dodger mintManufacturer#4  Brand#41
ECONOMY BRUSHED COPPER  7   SM PKG  1844.92 ites. eve
+33357  almond azure aquamarine papaya violet   Manufacturer#4  Brand#41
STANDARD ANODIZED TIN   12  WRAP CASE   1290.35 reful
+78486  almond azure blanched chiffon midnight  Manufacturer#5  Brand#52
LARGE BRUSHED BRASS 23  MED BAG 1464.48 hely blith
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14
PROMO PLATED TIN2   JUMBO BOX   1173.15 e pinto beans h
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14
PROMO PLATED TIN2   JUMBO BOX   1173.15 e pinto beans h
+17273  almond antique forest lavender goldenrodManufacturer#3  
Brand#35PROMO ANODIZED TIN  14  JUMBO CASE  1190.27 along 
the
+49671  almond antique gainsboro frosted violet Manufacturer#4  Brand#41
SMALL BRUSHED BRASS 10  SM BOX  1620.67 ccounts run quick
+112398 almond antique metallic orange dim  Manufacturer#3  Brand#32
MEDIUM BURNISHED BRASS  19  JUMBO JAR   1410.39 ole car
+40982  almond antique misty red olive  Manufacturer#3  Brand#32ECONOMY 
PLATED COPPER   1   LG PKG  1922.98 c foxes can s
+144293 almond antique olive coral navajo   Manufacturer#3  Brand#34
STANDARD POLISHED STEEL 45  JUMBO CAN   1337.29 ag furiously about 
+110592 almond antique salmon chartreuse burlywood  Manufacturer#1  
Brand#15PROMO BURNISHED NICKEL  6   JUMBO PKG   1602.59  to the 
furiously
+155733 almond antique sky peru orange  Manufacturer#5  Brand#53SMALL 
PLATED BRASS  2   WRAP DRUM   1788.73 furiously. bra
+191709 almond antique violet turquoise frosted Manufacturer#2  Brand#22
ECONOMY POLISHED STEEL  40  MED BOX 1800.7   haggle
+146985 almond aquamarine midnight light salmon Manufacturer#2  Brand#23
MEDIUM BURNISHED COPPER 2   SM CASE 2031.98 s cajole caref



[8/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8eaf18d5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8eaf18d5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8eaf18d5

Branch: refs/heads/master
Commit: 8eaf18d599909751efc4bb1e05d31e65da8a8d1e
Parents: 1253450
Author: Xuefu Zhang <xu...@uber.com>
Authored: Fri Oct 13 10:03:35 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Fri Oct 13 10:03:35 2017 -0700

--
 .../test/resources/testconfiguration.properties |8 +
 .../clientpositive/spark/subquery_multi.q.out   | 4129 ++
 .../spark/subquery_nested_subquery.q.out|   38 +
 .../clientpositive/spark/subquery_notin.q.out   | 7722 ++
 .../spark/subquery_null_agg.q.out   |  182 +
 .../clientpositive/spark/subquery_scalar.q.out  | 6619 +++
 .../clientpositive/spark/subquery_select.q.out  | 5379 
 .../spark/subquery_shared_alias.q.out   |   23 +
 .../clientpositive/spark/subquery_views.q.out   |  598 ++
 9 files changed, 24698 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 1a7c0d2..65cd79a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1266,7 +1266,15 @@ spark.query.files=add_part_multiple.q, \
   statsfs.q, \
   subquery_exists.q, \
   subquery_in.q, \
+  subquery_multi.q,\
   subquery_multiinsert.q, \
+  subquery_nested_subquery.q, \
+  subquery_notin.q,\
+  subquery_null_agg.q,\
+  subquery_scalar.q,\
+  subquery_select.q, \
+  subquery_shared_alias.q, \
+  subquery_views.q,\
   table_access_keys_stats.q, \
   temp_table.q, \
   temp_table_gb1.q, \



[3/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out
new file mode 100644
index 000..470efca
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out
@@ -0,0 +1,6619 @@
+PREHOOK: query: create table tnull(i int, c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tnull
+POSTHOOK: query: create table tnull(i int, c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tnull
+PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tnull
+POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tnull
+POSTHOOK: Lineage: tnull.c EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: tnull.i EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: create table tempty(c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tempty
+POSTHOOK: query: create table tempty(c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tempty
+PREHOOK: query: CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_null
+POSTHOOK: query: CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_null
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' 
overwrite into table part_null
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@part_null
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' 
overwrite into table part_null
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@part_null
+PREHOOK: query: insert into part_null 
values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED 
BAG',1464.48,'hely blith')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@part_null
+POSTHOOK: query: insert into part_null 
values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED 
BAG',1464.48,'hely blith')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@part_null
+POSTHOOK: Lineage: part_null.p_brand SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_comment SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_container SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_mfgr SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_name SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_partkey EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_size EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_type SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 
2' is a cross product
+PREHOOK: query: explain select * from part where p_size > (select avg(p_size) 
from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part where p_size > (select avg(p_size) 
from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  

[5/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
new file mode 100644
index 000..d7b9a41
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
@@ -0,0 +1,7722 @@
+Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 
2' is a cross product
+PREHOOK: query: explain
+select * 
+from src 
+where src.key not in  
+  ( select key  from src s1 
+where s1.key > '2'
+  )
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * 
+from src 
+where src.key not in  
+  ( select key  from src s1 
+where s1.key > '2'
+  )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Spark
+  Edges:
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 
(PARTITION-LEVEL SORT, 1)
+Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
+Reducer 5 <- Map 4 (GROUP, 1)
+Reducer 7 <- Map 6 (GROUP, 2)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: src
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: string), _col1 (type: 
string)
+Map 4 
+Map Operator Tree:
+TableScan
+  alias: s1
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: (key > '2') (type: boolean)
+Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: count(), count(key)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+sort order: 
+Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: bigint), _col1 (type: 
bigint)
+Map 6 
+Map Operator Tree:
+TableScan
+  alias: s1
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: (key > '2') (type: boolean)
+Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  keys: key (type: string)
+  mode: hash
+  outputColumnNames: _col0
+  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+Reducer 2 
+Reduce Operator Tree:
+  Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 
+  1 
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 500 Data size: 13812 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 500 Data size: 13812 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col1 (type: string), _col2 (type: 
bigint), _col3 (type: bigint)
+Reducer 3 
+Reduce Operator Tree:
+  Join Operator
+condition map:
+ 

[7/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
new file mode 100644
index 000..f9b2c1b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
@@ -0,0 +1,4129 @@
+PREHOOK: query: create table tnull(i int, c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tnull
+POSTHOOK: query: create table tnull(i int, c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tnull
+PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tnull
+POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tnull
+POSTHOOK: Lineage: tnull.c EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: tnull.i EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: create table tempty(c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tempty
+POSTHOOK: query: create table tempty(c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tempty
+PREHOOK: query: CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_null
+POSTHOOK: query: CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_null
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' 
overwrite into table part_null
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@part_null
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' 
overwrite into table part_null
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@part_null
+PREHOOK: query: insert into part_null 
values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED 
BAG',1464.48,'hely blith')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@part_null
+POSTHOOK: query: insert into part_null 
values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED 
BAG',1464.48,'hely blith')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@part_null
+POSTHOOK: Lineage: part_null.p_brand SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_comment SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_container SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_mfgr SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_name SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_partkey EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_size EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, 
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_type SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
+PREHOOK: query: explain select * from part_null where p_size IN (select p_size 
from part_null) AND p_brand IN (select p_brand from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part_null where p_size IN (select 
p_size from part_null) AND p_brand IN (select p_brand from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 

[1/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 1253450e0 -> 8eaf18d59


http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out
new file mode 100644
index 000..f907f91
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out
@@ -0,0 +1,23 @@
+PREHOOK: query: select *
+from src
+where src.key in (select key from src where key > '9')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: query: select *
+from src
+where src.key in (select key from src where key > '9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+92 val_92
+96 val_96
+97 val_97
+97 val_97
+90 val_90
+90 val_90
+90 val_90
+95 val_95
+95 val_95
+98 val_98
+98 val_98

http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_views.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_views.q.out
new file mode 100644
index 000..9a1c25f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out
@@ -0,0 +1,598 @@
+PREHOOK: query: create view cv1 as 
+select * 
+from src b 
+where exists
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cv1
+POSTHOOK: query: create view cv1 as 
+select * 
+from src b 
+where exists
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cv1
+POSTHOOK: Lineage: cv1.key SIMPLE [(src)b.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: cv1.value SIMPLE [(src)b.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: describe extended cv1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@cv1
+POSTHOOK: query: describe extended cv1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@cv1
+keystring  
+value  string  
+
+ A masked pattern was here 
+from src b  
+where exists
+  (select a.key 
+  from src a
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'), 
viewExpandedText:select `b`.`key`, `b`.`value` 
+from `default`.`src` `b`
+where exists
+  (select `a`.`key` 
+  from `default`.`src` `a`  
+  where `b`.`value` = `a`.`value`  and `a`.`key` = `b`.`key` and `a`.`value` > 
'val_9'), tableType:VIRTUAL_VIEW, rewriteEnabled:false)  
+PREHOOK: query: select * 
+from cv1 where cv1.key in (select key from cv1 c where c.key > '95')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cv1
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: query: select * 
+from cv1 where cv1.key in (select key from cv1 c where c.key > '95')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cv1
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: create view cv2 as 
+select * 
+from src b 
+where b.key not in
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_11'
+  )
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cv2
+POSTHOOK: query: create view cv2 as 
+select * 
+from src b 
+where b.key not in
+  (select a.key 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_11'
+  )
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cv2
+POSTHOOK: Lineage: cv2.key SIMPLE [(src)b.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: cv2.value SIMPLE [(src)b.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: describe extended cv2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@cv2
+POSTHOOK: query: describe extended cv2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@cv2
+keystring  
+value  string  
+
+ A 

[2/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)

2017-10-13 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_select.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_select.q.out
new file mode 100644
index 000..c3f3d58
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out
@@ -0,0 +1,5379 @@
+Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 
2' is a cross product
+PREHOOK: query: explain SELECT p_size, p_size IN (
+SELECT MAX(p_size) FROM part)
+FROM part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT p_size, p_size IN (
+SELECT MAX(p_size) FROM part)
+FROM part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Spark
+  Edges:
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 
(PARTITION-LEVEL SORT, 1)
+Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
+Reducer 5 <- Map 4 (GROUP, 1)
+Reducer 7 <- Map 4 (GROUP, 1)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part
+  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: p_size (type: int)
+outputColumnNames: _col0
+Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: int)
+Map 4 
+Map Operator Tree:
+TableScan
+  alias: part
+  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: p_size (type: int)
+outputColumnNames: p_size
+Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: max(p_size)
+  mode: hash
+  outputColumnNames: _col0
+  Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+sort order: 
+Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: int)
+Reducer 2 
+Reduce Operator Tree:
+  Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 
+  1 
+outputColumnNames: _col0, _col1, _col2
+Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 26 Data size: 3589 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col1 (type: bigint), _col2 (type: bigint)
+Reducer 3 
+Reduce Operator Tree:
+  Join Operator
+condition map:
+ Left Outer Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+outputColumnNames: _col0, _col1, _col2, _col4
+Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: _col0 (type: int), CASE WHEN ((_col1 = 0)) THEN 
(false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) 
WHEN ((_col2 < _col1)) THEN (null) ELSE (false) END (type: boolean)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 28 Data size: 3947 Basic stats: 
COMPLETE Column stats: NONE
+  File Output Operator
+compressed: false
+Statistics: Num rows: 28 Data size: 3947 Basic stats: 
COMPLETE Column stats: NONE
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 

hive git commit: HIVE-17401: Hive session idle timeout doesn't function properly (Reviewed by Peter Vary)

2017-09-06 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 9a5381cb9 -> 660e39e03


HIVE-17401: Hive session idle timeout doesn't function properly (Reviewed by 
Peter Vary)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/660e39e0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/660e39e0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/660e39e0

Branch: refs/heads/master
Commit: 660e39e03b68f6d256a6d4fd41193503a1f711c9
Parents: 9a5381c
Author: Xuefu Zhang <xu...@uber.com>
Authored: Wed Sep 6 10:06:01 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Wed Sep 6 10:06:01 2017 -0700

--
 .../service/cli/session/HiveSessionImpl.java| 26 
 1 file changed, 10 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/660e39e0/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java 
b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
index 57bb53c..906565c 100644
--- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
+++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
@@ -111,9 +111,8 @@ public class HiveSessionImpl implements HiveSession {
   // TODO: the control flow for this needs to be defined. Hive is supposed to 
be thread-local.
   private Hive sessionHive;
 
-  private volatile long lastAccessTime;
-  private volatile long lastIdleTime;
-  private volatile int activeCalls = 0;
+  private volatile long lastAccessTime = System.currentTimeMillis();
+  private volatile boolean lockedByUser;
   private final Semaphore operationLock;
 
 
@@ -184,7 +183,6 @@ public class HiveSessionImpl implements HiveSession {
   configureSession(sessionConfMap);
 }
 lastAccessTime = System.currentTimeMillis();
-lastIdleTime = lastAccessTime;
   }
 
 /**
@@ -384,12 +382,11 @@ public class HiveSessionImpl implements HiveSession {
 
sessionState.setIsUsingThriftJDBCBinarySerDe(updateIsUsingThriftJDBCBinarySerDe());
 if (userAccess) {
   lastAccessTime = System.currentTimeMillis();
+  lockedByUser = true;
 }
 // set the thread name with the logging prefix.
 sessionState.updateThreadName();
 Hive.set(sessionHive);
-activeCalls++;
-lastIdleTime = 0;
   }
 
   /**
@@ -424,12 +421,7 @@ public class HiveSessionImpl implements HiveSession {
 }
 if (userAccess) {
   lastAccessTime = System.currentTimeMillis();
-}
-activeCalls--;
-// lastIdleTime is only set by the last one
-// who calls release with empty opHandleSet.
-if (activeCalls == 0 && opHandleSet.isEmpty()) {
-  lastIdleTime = System.currentTimeMillis();
+  lockedByUser = false;
 }
   }
 
@@ -830,16 +822,18 @@ public class HiveSessionImpl implements HiveSession {
 
   @Override
   public long getNoOperationTime() {
-return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0;
+boolean noMoreOpHandle = false;
+synchronized (opHandleSet) {
+  noMoreOpHandle = opHandleSet.isEmpty();
+}
+return noMoreOpHandle && !lockedByUser ? System.currentTimeMillis() - 
lastAccessTime : 0;
   }
 
   private void closeTimedOutOperations(List operations) {
 acquire(false, false);
 try {
   for (Operation operation : operations) {
-synchronized (opHandleSet) {
-  opHandleSet.remove(operation.getHandle());
-}
+removeOpHandle(operation.getHandle());
 try {
   operation.close();
 } catch (Exception e) {



hive git commit: HIVE-16961: Hive on Spark leaks spark application in case user cancels query and closes session (reviewed by Rui)

2017-07-05 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 26f1bdeb4 -> 0731dab18


HIVE-16961: Hive on Spark leaks spark application in case user cancels query 
and closes session (reviewed by Rui)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0731dab1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0731dab1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0731dab1

Branch: refs/heads/master
Commit: 0731dab18c85363d4bad8a556c437a587277143c
Parents: 26f1bde
Author: Xuefu Zhang <xu...@uber.com>
Authored: Wed Jul 5 10:33:18 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Wed Jul 5 10:33:18 2017 -0700

--
 .../org/apache/hive/spark/client/SparkClientImpl.java| 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0731dab1/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
--
diff --git 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
index bf7e8db..03e773a 100644
--- 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
+++ 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
@@ -508,16 +508,19 @@ class SparkClientImpl implements SparkClient {
 }
   }
 
-  rpcServer.cancelClient(clientId,
-  "Child process exited before connecting back with error log 
" + errStr.toString());
   LOG.warn("Child process exited with code {}", exitCode);
+  rpcServer.cancelClient(clientId,
+  "Child process (spark-submit) exited before connecting back 
with error log " + errStr.toString());
 }
   } catch (InterruptedException ie) {
-LOG.warn("Waiting thread interrupted, killing child process.");
+LOG.warn("Thread waiting on the child process (spark-submit) is 
interrupted, killing the child process.");
+rpcServer.cancelClient(clientId, "Thread waiting on the child 
porcess (spark-submit) is interrupted");
 Thread.interrupted();
 child.destroy();
   } catch (Exception e) {
-LOG.warn("Exception while waiting for child process.", e);
+String errMsg = "Exception while waiting for child process 
(spark-submit)";
+LOG.warn(errMsg, e);
+rpcServer.cancelClient(clientId, errMsg);
   }
 }
   };



hive git commit: HIVE-16962: Better error msg for Hive on Spark in case user cancels query and closes session (reviewed by Chao)

2017-06-29 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 539896482 -> 10944ee34


HIVE-16962: Better error msg for Hive on Spark in case user cancels query and 
closes session (reviewed by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/10944ee3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/10944ee3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/10944ee3

Branch: refs/heads/master
Commit: 10944ee34a39efc0503ca917d1153751e1d495d2
Parents: 5398964
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Jun 29 10:01:05 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Jun 29 10:01:05 2017 -0700

--
 .../hive/ql/exec/spark/session/SparkSessionImpl.java  |  9 -
 .../org/apache/hive/spark/client/SparkClientImpl.java | 14 ++
 2 files changed, 18 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/10944ee3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
index 51c6715..8224ef9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java
@@ -56,13 +56,18 @@ public class SparkSessionImpl implements SparkSession {
 
   @Override
   public void open(HiveConf conf) throws HiveException {
+LOG.info("Trying to open Spark session {}", sessionId);
 this.conf = conf;
 isOpen = true;
 try {
   hiveSparkClient = HiveSparkClientFactory.createHiveSparkClient(conf);
 } catch (Throwable e) {
-  throw new HiveException("Failed to create spark client.", e);
+  // It's possible that user session is closed while creating Spark client.
+  String msg = isOpen ? "Failed to create Spark client for Spark session " 
+ sessionId :
+"Spark Session " + sessionId + " is closed before Spark client is 
created";
+  throw new HiveException(msg, e);
 }
+LOG.info("Spark session {} is successfully opened", sessionId);
   }
 
   @Override
@@ -121,10 +126,12 @@ public class SparkSessionImpl implements SparkSession {
 
   @Override
   public void close() {
+LOG.info("Trying to close Spark session {}", sessionId);
 isOpen = false;
 if (hiveSparkClient != null) {
   try {
 hiveSparkClient.close();
+LOG.info("Spark session {} is successfully closed", sessionId);
 cleanScratchDir();
   } catch (IOException e) {
 LOG.error("Failed to close spark session (" + sessionId + ").", e);

http://git-wip-us.apache.org/repos/asf/hive/blob/10944ee3/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
--
diff --git 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
index e40aa6b..bf7e8db 100644
--- 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
+++ 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
@@ -107,19 +107,25 @@ class SparkClientImpl implements SparkClient {
   // The RPC server will take care of timeouts here.
   this.driverRpc = rpcServer.registerClient(clientId, secret, 
protocol).get();
 } catch (Throwable e) {
+  String errorMsg = null;
   if (e.getCause() instanceof TimeoutException) {
-LOG.error("Timed out waiting for client to connect.\nPossible reasons 
include network " +
+errorMsg = "Timed out waiting for client to connect.\nPossible reasons 
include network " +
 "issues, errors in remote driver or the cluster has no available 
resources, etc." +
-"\nPlease check YARN or Spark driver's logs for further 
information.", e);
+"\nPlease check YARN or Spark driver's logs for further 
information.";
+  } else if (e.getCause() instanceof InterruptedException) {
+errorMsg = "Interruption occurred while waiting for client to 
connect.\nPossibly the Spark session is closed " +
+"such as in case of query cancellation." +
+"\nPlease refer to HiveServer2 logs for further information.";
   } else {
-LOG.error("Error while waiting for client to connect.", e);
+errorMsg = "Error while w

[1/2] hive git commit: HIVE-16799: Control the max number of task for a stage in a spark job (Reviewed by Rui)

2017-06-02 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 690a9f8e2 -> 788d486e8


HIVE-16799: Control the max number of task for a stage in a spark job (Reviewed 
by Rui)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/363ffe0a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/363ffe0a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/363ffe0a

Branch: refs/heads/master
Commit: 363ffe0ac7dec7e4804c1eb2ba76cb07660ae020
Parents: b560f49
Author: Xuefu Zhang <xu...@uber.com>
Authored: Fri Jun 2 11:26:33 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Fri Jun 2 11:26:33 2017 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  2 +
 .../test/resources/testconfiguration.properties |  3 +-
 .../hadoop/hive/cli/control/CliConfigs.java |  1 +
 .../hadoop/hive/ql/exec/spark/SparkTask.java|  2 +-
 .../spark/status/RemoteSparkJobMonitor.java | 14 
 .../ql/exec/spark/status/SparkJobMonitor.java   | 12 +++
 .../clientnegative/spark_stage_max_tasks.q  |  6 ++
 .../spark/spark_stage_max_tasks.q.out   | 77 
 8 files changed, 115 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 176d36f..fce8db3 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3371,6 +3371,8 @@ public class HiveConf extends Configuration {
 "Turn this off when there is a memory issue."),
 SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of 
tasks a Spark job may have.\n" +
 "If a Spark job contains more tasks than the maximum, it will be 
cancelled. A value of -1 means no limit."),
+SPARK_STAGE_MAX_TASKS("hive.spark.stage.max.tasks", -1, "The maximum 
number of tasks a stage in a Spark job may have.\n" +
+"If a Spark job stage contains more tasks than the maximum, the job 
will be cancelled. A value of -1 means no limit."),
 NWAYJOINREORDER("hive.reorder.nway.joins", true,
   "Runs reordering of tables within single n-way join (i.e.: picks 
streamtable)"),
 HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true,

http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index e613374..62462bd 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1459,5 +1459,6 @@ 
spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
   groupby3_map_skew_multi_distinct.q,\
   groupby3_multi_distinct.q,\
   groupby_grouping_sets7.q,\
-  spark_job_max_tasks.q
+  spark_job_max_tasks.q,\
+  spark_stage_max_tasks.q
 

http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
--
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 1457db0..27b87fb 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -288,6 +288,7 @@ public class CliConfigs {
 excludesFrom(testConfigProps, "minimr.query.negative.files");
 excludeQuery("authorization_uri_import.q");
 excludeQuery("spark_job_max_tasks.q");
+excludeQuery("spark_stage_max_tasks.q");
 
 setResultsDir("ql/src/test/results/clientnegative");
 setLogDir("itests/qtest/target/qfile-results/clientnegative");

http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
index b4fb49f..2ee8c93 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
+++ b/ql/src/

[2/2] hive git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive

2017-06-02 Thread xuefu
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/788d486e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/788d486e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/788d486e

Branch: refs/heads/master
Commit: 788d486e8fbf58919d04f15e965050f1e885093f
Parents: 363ffe0 690a9f8
Author: Xuefu Zhang <xu...@uber.com>
Authored: Fri Jun 2 11:27:38 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Fri Jun 2 11:27:38 2017 -0700

--
 bin/ext/version.sh  |  2 +-
 bin/hive| 20 ++--
 .../hive/http/Log4j2ConfiguratorServlet.java| 18 +++---
 .../hadoop/hive/metastore/HiveAlterHandler.java |  4 ++--
 .../hadoop/hive/metastore/MetaStoreUtils.java   | 14 +++---
 .../hive/metastore/TestMetaStoreUtils.java  | 16 +---
 .../fast/VectorMapJoinFastHashTable.java|  3 ++-
 7 files changed, 38 insertions(+), 39 deletions(-)
--




hive git commit: HIVE-16456: Kill spark job when InterruptedException happens or driverContext.isShutdown is true (Zhihai via Xuefu)

2017-05-09 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 067d953bf -> 4ba48aa5f


HIVE-16456: Kill spark job when InterruptedException happens or 
driverContext.isShutdown is true (Zhihai via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4ba48aa5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4ba48aa5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4ba48aa5

Branch: refs/heads/master
Commit: 4ba48aa5fcaa981ee469161bbf17611aa0392fd2
Parents: 067d953
Author: Xuefu Zhang <xu...@uber.com>
Authored: Tue May 9 09:40:13 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Tue May 9 09:40:13 2017 -0700

--
 .../hadoop/hive/ql/exec/spark/SparkTask.java| 32 +---
 1 file changed, 28 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4ba48aa5/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
index 98b1605..b4fb49f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
@@ -85,6 +85,7 @@ public class SparkTask extends Task {
   private transient List stageIds;
   private transient SparkJobRef jobRef = null;
   private transient boolean isShutdown = false;
+  private transient boolean jobKilled = false;
 
   @Override
   public void initialize(QueryState queryState, QueryPlan queryPlan, 
DriverContext driverContext,
@@ -112,6 +113,11 @@ public class SparkTask extends Task {
   jobRef = sparkSession.submit(driverContext, sparkWork);
   perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
 
+  if (driverContext.isShutdown()) {
+killJob();
+throw new HiveException("Operation is cancelled.");
+  }
+
   addToHistory(jobRef);
   sparkJobID = jobRef.getJobId();
   this.jobID = jobRef.getSparkJobStatus().getAppID();
@@ -130,11 +136,11 @@ public class SparkTask extends Task {
 // ideally also cancel the app request here. But w/o facilities from 
Spark or YARN,
 // it's difficult to do it on hive side alone. See HIVE-12650.
 LOG.info("Failed to submit Spark job " + sparkJobID);
-jobRef.cancelJob();
+killJob();
   } else if (rc == 4) {
 LOG.info("The number of tasks reaches above the limit " + 
conf.getIntVar(HiveConf.ConfVars.SPARK_JOB_MAX_TASKS) +
 ". Cancelling Spark job " + sparkJobID + " with application ID " + 
jobID );
-jobRef.cancelJob();
+killJob();
   }
 
   if (this.jobID == null) {
@@ -305,14 +311,27 @@ public class SparkTask extends Task {
   @Override
   public void shutdown() {
 super.shutdown();
-if (jobRef != null && !isShutdown) {
+killJob();
+isShutdown = true;
+  }
+
+  private void killJob() {
+boolean needToKillJob = false;
+if (jobRef != null && !jobKilled) {
+  synchronized (this) {
+if (!jobKilled) {
+  jobKilled = true;
+  needToKillJob = true;
+}
+  }
+}
+if (needToKillJob) {
   try {
 jobRef.cancelJob();
   } catch (Exception e) {
 LOG.warn("failed to kill job", e);
   }
 }
-isShutdown = true;
   }
 
   /**
@@ -393,6 +412,11 @@ public class SparkTask extends Task {
   if (rc != 0) {
 Throwable error = sparkJobStatus.getError();
 if (error != null) {
+  if ((error instanceof InterruptedException) ||
+  (error instanceof HiveException &&
+  error.getCause() instanceof InterruptedException)) {
+killJob();
+  }
   setException(error);
 }
   }



hive git commit: HIVE-16552: Limit the number of tasks a Spark job may contain (Reviewed by Rui)

2017-05-04 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 9e9356b5e -> c6b5ad663


HIVE-16552: Limit the number of tasks a Spark job may contain (Reviewed by Rui)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6b5ad66
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6b5ad66
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6b5ad66

Branch: refs/heads/master
Commit: c6b5ad663d235c15fc5bb5a24a1d3e9ac0d05140
Parents: 9e9356b
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu May 4 09:31:28 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu May 4 09:31:28 2017 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  2 +
 .../test/resources/testconfiguration.properties |  4 +-
 .../hadoop/hive/cli/control/CliConfigs.java |  1 +
 .../hadoop/hive/ql/exec/spark/SparkTask.java|  6 ++
 .../spark/status/RemoteSparkJobMonitor.java | 15 +++-
 .../ql/exec/spark/status/SparkJobMonitor.java   | 10 ++-
 .../clientnegative/spark_job_max_tasks.q|  6 ++
 .../spark/spark_job_max_tasks.q.out | 77 
 8 files changed, 118 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 84398c6..99c26ce 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3350,6 +3350,8 @@ public class HiveConf extends Configuration {
 "hive.spark.use.groupby.shuffle", true,
 "Spark groupByKey transformation has better performance but uses 
unbounded memory." +
 "Turn this off when there is a memory issue."),
+SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of 
tasks a Spark job may have.\n" +
+"If a Spark job contains more tasks than the maximum, it will be 
cancelled. A value of -1 means no limit."),
 NWAYJOINREORDER("hive.reorder.nway.joins", true,
   "Runs reordering of tables within single n-way join (i.e.: picks 
streamtable)"),
 HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true,

http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 753f3a9..5ab3076 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1445,4 +1445,6 @@ 
spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
   groupby2_multi_distinct.q,\
   groupby3_map_skew_multi_distinct.q,\
   groupby3_multi_distinct.q,\
-  groupby_grouping_sets7.q
+  groupby_grouping_sets7.q,\
+  spark_job_max_tasks.q
+

http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
--
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 67064b8..1457db0 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -287,6 +287,7 @@ public class CliConfigs {
 
 excludesFrom(testConfigProps, "minimr.query.negative.files");
 excludeQuery("authorization_uri_import.q");
+excludeQuery("spark_job_max_tasks.q");
 
 setResultsDir("ql/src/test/results/clientnegative");
 setLogDir("itests/qtest/target/qfile-results/clientnegative");

http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
index 32a7730..98b1605 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
@@ -129,8 +129,14 @@ public class SparkTask extends Task {
 // TODO: If the timeout is because of lack of resources in the 
cluste

[1/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)

2017-05-02 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 812fa3946 -> 00b644482


http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out 
b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
new file mode 100644
index 000..ca0910a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -0,0 +1,5921 @@
+PREHOOK: query: explain create table src_orc_merge_test_part(key int, value 
string) partitioned by (ds string, ts string) stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value 
string) partitioned by (ds string, ts string) stored as orc
+POSTHOOK: type: CREATETABLE
+Stage-0
+  Create Table Operator:
+name:default.src_orc_merge_test_part
+
+PREHOOK: query: create table src_orc_merge_test_part(key int, value string) 
partitioned by (ds string, ts string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_orc_merge_test_part
+POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) 
partitioned by (ds string, ts string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_orc_merge_test_part
+PREHOOK: query: alter table src_orc_merge_test_part add partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@src_orc_merge_test_part
+POSTHOOK: query: alter table src_orc_merge_test_part add partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@src_orc_merge_test_part
+POSTHOOK: Output: 
default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31
+PREHOOK: query: desc extended src_orc_merge_test_part partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_orc_merge_test_part
+POSTHOOK: query: desc extended src_orc_merge_test_part partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_orc_merge_test_part
+keyint 
+value  string  
+ds string  
+ts string  
+
+# Partition Information 
+# col_name data_type   comment 
+
+ds string  
+ts string  
+
+ A masked pattern was here 
+PREHOOK: query: explain insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Stage-2
+  Stats-Aggr Operator
+Stage-0
+  Move Operator
+table:{"name:":"default.src_orc_merge_test_part"}
+Stage-1
+  Map 1
+  File Output Operator [FS_3]
+table:{"name:":"default.src_orc_merge_test_part"}
+Select Operator [SEL_1] (rows=500 width=10)
+  Output:["_col0","_col1"]
+  TableScan [TS_0] (rows=500 width=10)
+default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+
+PREHOOK: query: insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: 
default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31
+POSTHOOK: query: insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: 
default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31
+POSTHOOK: Lineage: src_orc_merge_test_part 
PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_orc_merge_test_part 
PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain insert into table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table src_orc_merge_test_part 
partition(ds='2012-01-03', 

[3/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)

2017-05-02 Thread xuefu
HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/00b64448
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/00b64448
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/00b64448

Branch: refs/heads/master
Commit: 00b644482656da9fb40788744e692f4e677b4c0d
Parents: 812fa39
Author: Xuefu Zhang <xu...@uber.com>
Authored: Tue May 2 10:28:37 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Tue May 2 10:28:37 2017 -0700

--
 .../hive/common/jsonexplain/Connection.java |   35 +
 .../hive/common/jsonexplain/DagJsonParser.java  |  167 +
 .../common/jsonexplain/DagJsonParserUtils.java  |   53 +
 .../common/jsonexplain/JsonParserFactory.java   |4 +
 .../hadoop/hive/common/jsonexplain/Op.java  |  358 ++
 .../hadoop/hive/common/jsonexplain/Printer.java |   41 +
 .../hadoop/hive/common/jsonexplain/Stage.java   |  262 +
 .../hadoop/hive/common/jsonexplain/Vertex.java  |  323 +
 .../jsonexplain/spark/SparkJsonParser.java  |   35 +
 .../hive/common/jsonexplain/tez/Connection.java |   35 -
 .../hadoop/hive/common/jsonexplain/tez/Op.java  |  356 --
 .../hive/common/jsonexplain/tez/Printer.java|   41 -
 .../hive/common/jsonexplain/tez/Stage.java  |  262 -
 .../common/jsonexplain/tez/TezJsonParser.java   |  153 +-
 .../jsonexplain/tez/TezJsonParserUtils.java |   53 -
 .../hive/common/jsonexplain/tez/Vertex.java |  334 -
 .../org/apache/hadoop/hive/conf/HiveConf.java   |5 +-
 .../test/resources/testconfiguration.properties |1 +
 .../hadoop/hive/ql/optimizer/Optimizer.java |2 +-
 .../hive/ql/parse/ExplainSemanticAnalyzer.java  |   16 +-
 .../apache/hadoop/hive/ql/plan/SparkWork.java   |   10 +-
 .../clientpositive/spark_explainuser_1.q|  671 ++
 .../spark/spark_explainuser_1.q.out | 5921 ++
 23 files changed, 7915 insertions(+), 1223 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java
--
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java
new file mode 100644
index 000..0df6f4c
--- /dev/null
+++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.jsonexplain;
+
+public final class Connection implements Comparable{
+  public final String type;
+  public final Vertex from;
+
+  public Connection(String type, Vertex from) {
+super();
+this.type = type;
+this.from = from;
+  }
+
+  @Override
+  public int compareTo(Connection o) {
+return from.compareTo(o.from);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java
--
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java
new file mode 100644
index 000..1f01685
--- /dev/null
+++ 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to 

[2/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)

2017-05-02 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java
--
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java
 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java
index ea86048..294dc6b 100644
--- 
a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java
+++ 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java
@@ -18,146 +18,29 @@
 
 package org.apache.hadoop.hive.common.jsonexplain.tez;
 
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
+import org.apache.hadoop.hive.common.jsonexplain.DagJsonParser;
 
-import org.apache.hadoop.hive.common.jsonexplain.JsonParser;
-import org.json.JSONObject;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-public final class TezJsonParser implements JsonParser {
-  public final Map stages = new LinkedHashMap<>();
-  protected final Logger LOG;
-  // the objects that have been printed.
-  public final Set printSet = new LinkedHashSet<>();
-  // the vertex that should be inlined. 
-  public final Map inlineMap = new LinkedHashMap<>();
-
-  public TezJsonParser() {
-super();
-LOG = LoggerFactory.getLogger(this.getClass().getName());
-  }
-
-  public void extractStagesAndPlans(JSONObject inputObject) throws Exception {
-// extract stages
-JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES");
-if (dependency != null && dependency.length() > 0) {
-  // iterate for the first time to get all the names of stages.
-  for (String stageName : JSONObject.getNames(dependency)) {
-this.stages.put(stageName, new Stage(stageName, this));
-  }
-  // iterate for the second time to get all the dependency.
-  for (String stageName : JSONObject.getNames(dependency)) {
-JSONObject dependentStageNames = dependency.getJSONObject(stageName);
-this.stages.get(stageName).addDependency(dependentStageNames, 
this.stages);
-  }
-}
-// extract stage plans
-JSONObject stagePlans = inputObject.getJSONObject("STAGE PLANS");
-if (stagePlans != null && stagePlans.length() > 0) {
-  for (String stageName : JSONObject.getNames(stagePlans)) {
-JSONObject stagePlan = stagePlans.getJSONObject(stageName);
-this.stages.get(stageName).extractVertex(stagePlan);
-  }
-}
-  }
-
-  /**
-   * @param indentFlag
-   *  help to generate correct indent
-   * @return
-   */
-  public static String prefixString(int indentFlag) {
-StringBuilder sb = new StringBuilder();
-for (int index = 0; index < indentFlag; index++) {
-  sb.append("  ");
-}
-return sb.toString();
-  }
-
-  /**
-   * @param indentFlag
-   * @param tail
-   *  help to generate correct indent with a specific tail
-   * @return
-   */
-  public static String prefixString(int indentFlag, String tail) {
-StringBuilder sb = new StringBuilder();
-for (int index = 0; index < indentFlag; index++) {
-  sb.append("  ");
-}
-int len = sb.length();
-return sb.replace(len - tail.length(), len, tail).toString();
-  }
+public class TezJsonParser extends DagJsonParser {
 
   @Override
-  public void print(JSONObject inputObject, PrintStream outputStream) throws 
Exception {
-LOG.info("JsonParser is parsing:" + inputObject.toString());
-this.extractStagesAndPlans(inputObject);
-Printer printer = new Printer();
-// print out the cbo info
-if (inputObject.has("cboInfo")) {
-  printer.println(inputObject.getString("cboInfo"));
-  printer.println();
-}
-// print out the vertex dependency in root stage
-for (Stage candidate : this.stages.values()) {
-  if (candidate.tezStageDependency != null && 
candidate.tezStageDependency.size() > 0) {
-printer.println("Vertex dependency in root stage");
-for (Entry entry : 
candidate.tezStageDependency.entrySet()) {
-  StringBuilder sb = new StringBuilder();
-  sb.append(entry.getKey().name);
-  sb.append(" <- ");
-  boolean printcomma = false;
-  for (Connection connection : entry.getValue()) {
-if (printcomma) {
-  sb.append(", ");
-} else {
-  printcomma = true;
-}
-sb.append(connection.from.name + " (" + connection.type + ")");
-  }
-  printer.println(sb.toString());
-}
-printer.println();
-  }
+  public String mapEdgeType(String edgeName) {
+switch (edgeName) {
+  case "BROADCAST_EDGE":
+ 

hive git commit: HIVE-16524: Remove the redundant item type in hiveserver2.jsp and QueryProfileTmpl.jamon (ZhangBing via Xuefu)

2017-05-01 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 79e3c5a8d -> 5ab03cba5


HIVE-16524: Remove the redundant item type in hiveserver2.jsp and 
QueryProfileTmpl.jamon (ZhangBing via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ab03cba
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ab03cba
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ab03cba

Branch: refs/heads/master
Commit: 5ab03cba5999de0c95e24aafad074099231297bc
Parents: 79e3c5a
Author: Xuefu Zhang <xu...@uber.com>
Authored: Mon May 1 18:27:53 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Mon May 1 18:27:53 2017 -0700

--
 .../src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon | 10 +-
 .../resources/hive-webapps/hiveserver2/hiveserver2.jsp|  8 
 2 files changed, 9 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/5ab03cba/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon
--
diff --git a/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon 
b/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon
index 07aa3c1..fa69eb2 100644
--- a/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon
+++ b/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon
@@ -169,7 +169,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay;
 <%args>
 SQLOperationDisplay sod;
 
-   
+   

Stage Id
Status
@@ -218,7 +218,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay;
 
 
   Compile-time metadata operations
-
+
   
  Call Name
  Time (ms)
@@ -237,7 +237,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay;
 
 
   Execution-time metadata operations
-
+
   
  Call Name
  Time (ms)
@@ -256,7 +256,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay;
 
 
   Compile-Time Perf-Logger
-
+
   
  Compile-time Call Name
  Time (ms)
@@ -275,7 +275,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay;
 
 
   Execution-Time Perf-Logger
-
+
   
  Execution-time Call Name
  Time (ms)

http://git-wip-us.apache.org/repos/asf/hive/blob/5ab03cba/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp
--
diff --git a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp 
b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp
index 33797ed..0e0803b 100644
--- a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp
+++ b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp
@@ -98,7 +98,7 @@ if (sessionManager != null) {
 
 
 Active Sessions
-
+
 
 User Name
 IP Address
@@ -128,7 +128,7 @@ for (HiveSession hiveSession: hiveSessions) {
 
 
 Open Queries
-
+
 
 User Name
 Query
@@ -169,7 +169,7 @@ for (HiveSession hiveSession: hiveSessions) {
 
 
 Last Max <%= 
conf.get(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES.varname) %> Closed 
Queries
-
+
 
 User Name
 Query
@@ -213,7 +213,7 @@ for (HiveSession hiveSession: hiveSessions) {
 
 
 Software Attributes
-
+
 
 Attribute Name
 Value



[2/2] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)

2017-05-01 Thread xuefu
HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79e3c5a8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79e3c5a8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79e3c5a8

Branch: refs/heads/master
Commit: 79e3c5a8d10e60ae1a981e74b0c48011d3fb2cdc
Parents: 62fbdd8
Author: Xuefu Zhang <xu...@uber.com>
Authored: Mon May 1 18:16:27 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Mon May 1 18:16:27 2017 -0700

--
 .../hive/common/jsonexplain/DagJsonParser.java  |  167 +
 .../common/jsonexplain/JsonParserFactory.java   |4 +
 .../jsonexplain/spark/SparkJsonParser.java  |   35 +
 .../hive/common/jsonexplain/tez/Connection.java |2 +-
 .../hadoop/hive/common/jsonexplain/tez/Op.java  |   54 +-
 .../hive/common/jsonexplain/tez/Printer.java|2 +-
 .../hive/common/jsonexplain/tez/Stage.java  |   20 +-
 .../common/jsonexplain/tez/TezJsonParser.java   |  153 +-
 .../jsonexplain/tez/TezJsonParserUtils.java |6 +-
 .../hive/common/jsonexplain/tez/Vertex.java |   87 +-
 .../org/apache/hadoop/hive/conf/HiveConf.java   |5 +-
 .../test/resources/testconfiguration.properties |1 +
 .../hadoop/hive/ql/optimizer/Optimizer.java |2 +-
 .../hive/ql/parse/ExplainSemanticAnalyzer.java  |   16 +-
 .../apache/hadoop/hive/ql/plan/SparkWork.java   |   10 +-
 .../clientpositive/spark_explainuser_1.q|  671 ++
 .../spark/spark_explainuser_1.q.out | 5921 ++
 17 files changed, 6924 insertions(+), 232 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/79e3c5a8/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java
--
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java
new file mode 100644
index 000..1f01685
--- /dev/null
+++ 
b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.jsonexplain;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.hadoop.hive.common.jsonexplain.JsonParser;
+import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class DagJsonParser implements JsonParser {
+  public final Map<String, Stage> stages = new LinkedHashMap<>();
+  protected final Logger LOG;
+  // the objects that have been printed.
+  public final Set printSet = new LinkedHashSet<>();
+  // the vertex that should be inlined. <Operator, list of Vertex that is
+  // inlined>
+  public final Map<Op, List> inlineMap = new LinkedHashMap<>();
+
+  public DagJsonParser() {
+super();
+LOG = LoggerFactory.getLogger(this.getClass().getName());
+  }
+
+  public void extractStagesAndPlans(JSONObject inputObject) throws Exception {
+// extract stages
+JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES");
+if (dependency != null && dependency.length() > 0) {
+  // iterate for the first time to get all the names of stages.
+  for (String stageName : JSONObject.getNames(dependency)) {
+this.stages.put(stageName, new Stage(stageName, this));
+  }
+  // iterate for the second time to get all the dependency.
+  for (String stageName : JSONObject.getNames(dependency)) {
+JSONObject dependentStageNames = dependency.getJSONObject(stageName);
+this.stages.get(stageName).addDependency(dependentStageNames, 
this.stages);
+  }
+}
+// extract stage plans
+JSON

[1/2] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)

2017-05-01 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 62fbdd86e -> 79e3c5a8d


http://git-wip-us.apache.org/repos/asf/hive/blob/79e3c5a8/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out 
b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
new file mode 100644
index 000..ca0910a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -0,0 +1,5921 @@
+PREHOOK: query: explain create table src_orc_merge_test_part(key int, value 
string) partitioned by (ds string, ts string) stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value 
string) partitioned by (ds string, ts string) stored as orc
+POSTHOOK: type: CREATETABLE
+Stage-0
+  Create Table Operator:
+name:default.src_orc_merge_test_part
+
+PREHOOK: query: create table src_orc_merge_test_part(key int, value string) 
partitioned by (ds string, ts string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_orc_merge_test_part
+POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) 
partitioned by (ds string, ts string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_orc_merge_test_part
+PREHOOK: query: alter table src_orc_merge_test_part add partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@src_orc_merge_test_part
+POSTHOOK: query: alter table src_orc_merge_test_part add partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@src_orc_merge_test_part
+POSTHOOK: Output: 
default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31
+PREHOOK: query: desc extended src_orc_merge_test_part partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src_orc_merge_test_part
+POSTHOOK: query: desc extended src_orc_merge_test_part partition 
(ds='2012-01-03', ts='2012-01-03+14:46:31')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src_orc_merge_test_part
+keyint 
+value  string  
+ds string  
+ts string  
+
+# Partition Information 
+# col_name data_type   comment 
+
+ds string  
+ts string  
+
+ A masked pattern was here 
+PREHOOK: query: explain insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Stage-2
+  Stats-Aggr Operator
+Stage-0
+  Move Operator
+table:{"name:":"default.src_orc_merge_test_part"}
+Stage-1
+  Map 1
+  File Output Operator [FS_3]
+table:{"name:":"default.src_orc_merge_test_part"}
+Select Operator [SEL_1] (rows=500 width=10)
+  Output:["_col0","_col1"]
+  TableScan [TS_0] (rows=500 width=10)
+default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+
+PREHOOK: query: insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: 
default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31
+POSTHOOK: query: insert overwrite table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: 
default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31
+POSTHOOK: Lineage: src_orc_merge_test_part 
PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_orc_merge_test_part 
PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain insert into table src_orc_merge_test_part 
partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table src_orc_merge_test_part 
partition(ds='2012-01-03', 

hive git commit: HIVE-12614: RESET command does not close spark session (Sahil via Xuefu)

2017-04-25 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master fb3df4641 -> 1ed36f042


HIVE-12614: RESET command does not close spark session (Sahil via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ed36f04
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ed36f04
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ed36f04

Branch: refs/heads/master
Commit: 1ed36f0428d53303d02ddd5c8a3a6c7f8db9e19a
Parents: fb3df46
Author: Xuefu Zhang <xu...@uber.com>
Authored: Tue Apr 25 14:09:35 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Tue Apr 25 14:10:39 2017 -0700

--
 .../hive/ql/processors/ResetProcessor.java  | 21 ---
 .../hadoop/hive/ql/processors/SetProcessor.java | 15 +++--
 .../hive/ql/processors/TestResetProcessor.java  | 59 
 3 files changed, 82 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1ed36f04/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java
index bbd4501..b40879d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java
@@ -23,7 +23,11 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+
 import org.apache.commons.lang3.StringUtils;
+
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveVariableSource;
 import org.apache.hadoop.hive.conf.SystemVariables;
@@ -33,7 +37,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive;
 import 
org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
 import org.apache.hadoop.hive.ql.session.SessionState;
 
-import com.google.common.collect.Lists;
 
 public class ResetProcessor implements CommandProcessor {
 
@@ -45,8 +48,11 @@ public class ResetProcessor implements CommandProcessor {
 
   @Override
   public CommandProcessorResponse run(String command) throws 
CommandNeedRetryException {
-SessionState ss = SessionState.get();
+return run(SessionState.get(), command);
+  }
 
+  @VisibleForTesting
+  CommandProcessorResponse run(SessionState ss, String command) throws 
CommandNeedRetryException {
 CommandProcessorResponse authErrResp =
 CommandUtil.authorizeCommand(ss, HiveOperationType.RESET, 
Arrays.asList(command));
 if (authErrResp != null) {
@@ -88,7 +94,7 @@ public class ResetProcessor implements CommandProcessor {
 ? Lists.newArrayList("Resetting " + message + " to default values") : 
null);
   }
 
-  private void resetOverridesOnly(SessionState ss) {
+  private static void resetOverridesOnly(SessionState ss) {
 if (ss.getOverriddenConfigurations().isEmpty()) return;
 HiveConf conf = new HiveConf();
 for (String key : ss.getOverriddenConfigurations().keySet()) {
@@ -97,21 +103,20 @@ public class ResetProcessor implements CommandProcessor {
 ss.getOverriddenConfigurations().clear();
   }
 
-  private void resetOverrideOnly(SessionState ss, String varname) {
+  private static void resetOverrideOnly(SessionState ss, String varname) {
 if (!ss.getOverriddenConfigurations().containsKey(varname)) return;
 setSessionVariableFromConf(ss, varname, new HiveConf());
 ss.getOverriddenConfigurations().remove(varname);
   }
 
-  private void setSessionVariableFromConf(SessionState ss, String varname,
-  HiveConf conf) {
+  private static void setSessionVariableFromConf(SessionState ss, String 
varname, HiveConf conf) {
 String value = conf.get(varname);
 if (value != null) {
-  ss.getConf().set(varname, value);
+  SetProcessor.setConf(ss, varname, varname, value, false);
 }
   }
 
-  private CommandProcessorResponse resetToDefault(SessionState ss, String 
varname) {
+  private static CommandProcessorResponse resetToDefault(SessionState ss, 
String varname) {
 varname = varname.trim();
 try {
   String nonErrorMessage = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/1ed36f04/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
index 0ffa182..1458211 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java

hive git commit: HIVE-16430: Add log to show the cancelled query id when cancelOperation is called (Zhihai via Xuefu)

2017-04-14 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 794cfa35a -> 4deefcd50


HIVE-16430: Add log to show the cancelled query id when cancelOperation is 
called (Zhihai via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4deefcd5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4deefcd5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4deefcd5

Branch: refs/heads/master
Commit: 4deefcd50433c29dcf7cb4d1e422097b9cfcbca1
Parents: 794cfa3
Author: Xuefu Zhang <xu...@uber.com>
Authored: Fri Apr 14 06:59:46 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Fri Apr 14 06:59:46 2017 -0700

--
 .../apache/hive/service/cli/operation/SQLOperation.java | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4deefcd5/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java 
b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
index 04fc0a1..d9bfba87 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -398,9 +398,11 @@ public class SQLOperation extends 
ExecuteStatementOperation {
   Future backgroundHandle = getBackgroundHandle();
   if (backgroundHandle != null) {
 boolean success = backgroundHandle.cancel(true);
+String queryId = 
confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname);
 if (success) {
-  String queryId = 
confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname);
   LOG.info("The running operation has been successfully interrupted: " 
+ queryId);
+} else if (state == OperationState.CANCELED) {
+  LOG.info("The running operation could not be cancelled, typically 
because it has already completed normally: " + queryId);
 }
   }
 }
@@ -427,8 +429,16 @@ public class SQLOperation extends 
ExecuteStatementOperation {
 
   @Override
   public void cancel(OperationState stateAfterCancel) throws HiveSQLException {
+String queryId = null;
+if (stateAfterCancel == OperationState.CANCELED) {
+  queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname);
+  LOG.info("Cancelling the query execution: " + queryId);
+}
 cleanup(stateAfterCancel);
 cleanupOperationLog();
+if (stateAfterCancel == OperationState.CANCELED) {
+  LOG.info("Successfully cancelled the query: " + queryId);
+}
   }
 
   @Override



hive git commit: HIVE-16286: Log canceled query id (Jimmy via Xuefu)

2017-03-23 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 736d2e861 -> 8fa9d5833


HIVE-16286: Log canceled query id (Jimmy via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8fa9d583
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8fa9d583
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8fa9d583

Branch: refs/heads/master
Commit: 8fa9d5833c5c4fda092a917c9881db8fbfea5332
Parents: 736d2e8
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Mar 23 19:36:16 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Mar 23 19:36:16 2017 -0700

--
 .../org/apache/hive/service/cli/operation/SQLOperation.java| 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8fa9d583/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java 
b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
index ff389ac..f41092e 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -172,8 +172,9 @@ public class SQLOperation extends ExecuteStatementOperation 
{
   @Override
   public void run() {
 try {
+  String queryId = 
confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname);
   LOG.info("Query timed out after: " + queryTimeout
-  + " seconds. Cancelling the execution now.");
+  + " seconds. Cancelling the execution now: " + queryId);
   SQLOperation.this.cancel(OperationState.TIMEDOUT);
 } catch (HiveSQLException e) {
   LOG.error("Error cancelling the query after timeout: " + 
queryTimeout + " seconds", e);
@@ -412,7 +413,8 @@ public class SQLOperation extends ExecuteStatementOperation 
{
   if (backgroundHandle != null) {
 boolean success = backgroundHandle.cancel(true);
 if (success) {
-  LOG.info("The running operation has been successfully interrupted.");
+  String queryId = 
confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname);
+  LOG.info("The running operation has been successfully interrupted: " 
+ queryId);
 }
   }
 }



[1/2] hive git commit: HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter)

2017-03-16 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 1f7e26ff2 -> 71f4930d9


http://git-wip-us.apache.org/repos/asf/hive/blob/71f4930d/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
--
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
 
b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
index 7a565dd..f733c1e 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
@@ -145,7 +145,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
* Int: 8 decimal digits.  An even number and 1/2 of MAX_LONGWORD_DECIMAL.
*/
   private static final int INTWORD_DECIMAL_DIGITS = 8;
-  private static final int MAX_INTWORD_DECIMAL = (int) 
powerOfTenTable[INTWORD_DECIMAL_DIGITS] - 1;
   private static final int MULTIPLER_INTWORD_DECIMAL = (int) 
powerOfTenTable[INTWORD_DECIMAL_DIGITS];
 
   /**
@@ -164,9 +163,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
   private static final long MAX_HIGHWORD_DECIMAL =
   powerOfTenTable[HIGHWORD_DECIMAL_DIGITS] - 1;
 
-  private static long HIGHWORD_DIVIDE_FACTOR = 
powerOfTenTable[LONGWORD_DECIMAL_DIGITS - HIGHWORD_DECIMAL_DIGITS];
-  private static long HIGHWORD_MULTIPLY_FACTOR = 
powerOfTenTable[HIGHWORD_DECIMAL_DIGITS];
-
   // 38 * 2 or 76 full decimal maximum - (64 + 8) digits in 4 lower longs (4 
digits here).
   private static final long FULL_MAX_HIGHWORD_DECIMAL =
   powerOfTenTable[MAX_DECIMAL_DIGITS * 2 - (FOUR_X_LONGWORD_DECIMAL_DIGITS 
+ INTWORD_DECIMAL_DIGITS)] - 1;
@@ -189,11 +185,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
   BigInteger.ONE.add(BIG_INTEGER_MAX_LONGWORD_DECIMAL);
   private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_2X =
   
BIG_INTEGER_LONGWORD_MULTIPLIER.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER);
-  private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_3X =
-  
BIG_INTEGER_LONGWORD_MULTIPLIER_2X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER);
-  private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_4X =
-  
BIG_INTEGER_LONGWORD_MULTIPLIER_3X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER);
-
   private static final BigInteger BIG_INTEGER_MAX_HIGHWORD_DECIMAL =
   BigInteger.valueOf(MAX_HIGHWORD_DECIMAL);
   private static final BigInteger BIG_INTEGER_HIGHWORD_MULTIPLIER =
@@ -203,21 +194,21 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
   // conversion.
 
   // There is only one blank in UTF-8.
-  private final static byte BYTE_BLANK = (byte) ' ';
+  private static final byte BYTE_BLANK = (byte) ' ';
 
-  private final static byte BYTE_DIGIT_ZERO = (byte) '0';
-  private final static byte BYTE_DIGIT_NINE = (byte) '9';
+  private static final byte BYTE_DIGIT_ZERO = (byte) '0';
+  private static final byte BYTE_DIGIT_NINE = (byte) '9';
 
   // Decimal point.
-  private final static byte BYTE_DOT = (byte) '.';
+  private static final byte BYTE_DOT = (byte) '.';
 
   // Sign.
-  private final static byte BYTE_MINUS = (byte) '-';
-  private final static byte BYTE_PLUS = (byte) '+';
+  private static final byte BYTE_MINUS = (byte) '-';
+  private static final byte BYTE_PLUS = (byte) '+';
 
   // Exponent E or e.
-  private final static byte BYTE_EXPONENT_LOWER = (byte) 'e';
-  private final static byte BYTE_EXPONENT_UPPER = (byte) 'E';
+  private static final byte BYTE_EXPONENT_LOWER = (byte) 'e';
+  private static final byte BYTE_EXPONENT_UPPER = (byte) 'E';
 
   
//
   // Initialize (fastSetFrom*).
@@ -1758,7 +1749,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
*  4,611,686,018,427,387,904 or
*  461,1686018427387904 (16 digit comma'd)
*/
-  private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 =
+  private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 =
   new FastHiveDecimal(1, 1686018427387904L, 461L, 0, 19, 0);
 
   /*
@@ -1769,7 +1760,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
*  9,223,372,036,854,775,808 or
*  922,3372036854775808 (16 digit comma'd)
*/
-  private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 =
+  private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 =
   new FastHiveDecimal(1, 3372036854775808L, 922L, 0, 19, 0);
 
   /*
@@ -1784,7 +1775,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
*  42,535,295,865,117,307,932,921,825,928,971,026,432 or
*  425352,9586511730793292,1825928971026432  (16 digit comma'd)
*/
-  private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 =
+  private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 =
   new 

[2/2] hive git commit: HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter)

2017-03-16 Thread xuefu
HIVE-16183: Fix potential thread safety issues with static variables (reviewed 
by rui, sergey, and peter)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/71f4930d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/71f4930d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/71f4930d

Branch: refs/heads/master
Commit: 71f4930d95475e7e63b5acc55af3809aefcc71e0
Parents: 1f7e26f
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Mar 16 19:20:41 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Mar 16 19:20:41 2017 -0700

--
 .../org/apache/hive/beeline/BeeLineOpts.java|  2 +-
 .../apache/hive/beeline/HiveSchemaHelper.java   | 12 ++--
 .../org/apache/hive/beeline/HiveSchemaTool.java |  2 +-
 .../org/apache/hadoop/hive/cli/RCFileCat.java   | 13 ++--
 .../apache/hadoop/hive/cli/TestRCFileCat.java   |  4 +-
 .../org/apache/hadoop/hive/common/LogUtils.java |  5 +-
 .../hadoop/hive/common/StatsSetupConst.java |  2 +-
 .../hive/metastore/TestMetastoreVersion.java|  4 +-
 .../hive/metastore/MetaStoreSchemaInfo.java | 16 ++---
 .../hadoop/hive/ql/exec/ArchiveUtils.java   |  7 +-
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  2 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   | 21 +++---
 .../ql/exec/vector/VectorizationContext.java|  2 +-
 .../exec/vector/expressions/CuckooSetBytes.java |  4 +-
 .../fast/VectorMapJoinFastHashTable.java|  6 +-
 .../hadoop/hive/ql/history/HiveHistoryImpl.java |  5 +-
 .../apache/hadoop/hive/ql/index/HiveIndex.java  |  4 +-
 .../hadoop/hive/ql/io/HiveFileFormatUtils.java  |  2 +-
 .../hadoop/hive/ql/io/HiveInputFormat.java  |  3 +-
 .../org/apache/hadoop/hive/ql/io/RCFile.java|  2 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  6 +-
 .../ql/io/rcfile/stats/PartialScanTask.java |  6 +-
 .../hadoop/hive/ql/metadata/VirtualColumn.java  |  2 +-
 .../hive/ql/optimizer/GenMapRedUtils.java   |  6 +-
 .../ListBucketingPrunerUtils.java   |  4 +-
 .../physical/GenMRSkewJoinProcessor.java| 10 +--
 .../hive/ql/optimizer/physical/Vectorizer.java  | 42 +--
 .../ql/optimizer/physical/VectorizerReason.java |  2 +-
 .../hive/ql/parse/BaseSemanticAnalyzer.java |  8 +--
 .../hive/ql/parse/DDLSemanticAnalyzer.java  |  2 +-
 .../hadoop/hive/ql/parse/WindowingSpec.java |  2 +-
 .../hadoop/hive/ql/plan/AbstractVectorDesc.java |  4 +-
 .../apache/hadoop/hive/ql/plan/GroupByDesc.java |  2 +-
 .../apache/hadoop/hive/ql/plan/MapJoinDesc.java |  6 +-
 .../hadoop/hive/ql/plan/ReduceSinkDesc.java |  2 +-
 .../hive/ql/plan/VectorAppMasterEventDesc.java  |  2 +-
 .../hadoop/hive/ql/plan/VectorFileSinkDesc.java |  2 +-
 .../hadoop/hive/ql/plan/VectorFilterDesc.java   |  2 +-
 .../hadoop/hive/ql/plan/VectorGroupByDesc.java  |  2 +-
 .../hadoop/hive/ql/plan/VectorLimitDesc.java|  2 +-
 .../hadoop/hive/ql/plan/VectorMapJoinDesc.java  |  2 +-
 .../hadoop/hive/ql/plan/VectorMapJoinInfo.java  |  2 +-
 .../hive/ql/plan/VectorPartitionDesc.java   |  2 +-
 .../hive/ql/plan/VectorReduceSinkDesc.java  |  2 +-
 .../hive/ql/plan/VectorReduceSinkInfo.java  |  2 +-
 .../hadoop/hive/ql/plan/VectorSMBJoinDesc.java  |  2 +-
 .../hadoop/hive/ql/plan/VectorSelectDesc.java   |  2 +-
 .../ql/plan/VectorSparkHashTableSinkDesc.java   |  2 +-
 .../VectorSparkPartitionPruningSinkDesc.java|  2 +-
 .../hive/ql/plan/VectorTableScanDesc.java   |  2 +-
 .../hadoop/hive/ql/processors/HiveCommand.java  |  2 +-
 .../udf/generic/GenericUDFInternalInterval.java |  4 +-
 .../mapjoin/fast/CheckFastRowHashMap.java   | 17 ++---
 .../mapjoin/fast/CommonFastHashTable.java   |  4 +-
 .../apache/hadoop/hive/serde2/SerDeUtils.java   |  2 +-
 .../hive/serde2/avro/AvroDeserializer.java  |  2 +-
 .../lazy/fast/LazySimpleDeserializeRead.java|  4 +-
 .../hive/serde2/lazy/fast/StringToDouble.java   |  4 +-
 .../hive/serde2/lazybinary/LazyBinaryUtils.java |  2 +-
 .../hive/serde2/typeinfo/TypeInfoUtils.java |  2 +-
 .../org/apache/hadoop/hive/io/HdfsUtils.java|  4 +-
 .../hive/io/HiveIOExceptionHandlerChain.java|  2 +-
 .../hive/io/HiveIOExceptionHandlerUtil.java |  4 +-
 .../apache/hadoop/hive/shims/ShimLoader.java|  2 +-
 .../hive/common/type/FastHiveDecimalImpl.java   | 73 +---
 .../hadoop/hive/common/type/RandomTypeUtil.java | 10 +--
 .../hive/testutils/jdbc/HiveBurnInClient.java   |  4 +-
 67 files changed, 151 insertions(+), 243 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/71f4930d/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java
--
diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java 
b/beeline/src/java/org/apache/hive/beeline/BeeL

hive git commit: HIVE-16196: UDFJson having thread-safety issues (reviewed by Chao)

2017-03-16 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 0e62d3dcb -> 87be4b31c


HIVE-16196: UDFJson having thread-safety issues (reviewed by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87be4b31
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87be4b31
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87be4b31

Branch: refs/heads/master
Commit: 87be4b31ce5abbe03ee8461a437c901b5ee9ed05
Parents: 0e62d3d
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Mar 16 13:27:53 2017 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Mar 16 13:27:53 2017 -0700

--
 .../org/apache/hadoop/hive/ql/udf/UDFJson.java  | 67 
 1 file changed, 41 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/87be4b31/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
index 2c42fae..0c54754 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
@@ -59,20 +59,14 @@ import org.codehaus.jackson.type.JavaType;
 + "  [,] : Union operator\n"
 + "  [start:end:step] : array slice operator\n")
 public class UDFJson extends UDF {
-  private final Pattern patternKey = 
Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*");
-  private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
-
-  private static final JsonFactory JSON_FACTORY = new JsonFactory();
-  static {
-// Allows for unescaped ASCII control characters in JSON values
-JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
-// Enabled to accept quoting of all character backslash qooting mechanism
-JSON_FACTORY.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER);
-  }
-  private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
+  private static final Pattern patternKey = 
Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*");
+  private static final Pattern patternIndex = 
Pattern.compile("\\[([0-9]+|\\*)\\]");
   private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
   private static final JavaType LIST_TYPE = TypeFactory.fromClass(List.class);
 
+  private final JsonFactory jsonFactory = new JsonFactory();
+  private final ObjectMapper objectMapper = new ObjectMapper(jsonFactory);
+
   // An LRU cache using a linked hash map
   static class HashCache<K, V> extends LinkedHashMap<K, V> {
 
@@ -93,16 +87,18 @@ public class UDFJson extends UDF {
 
   }
 
-  static Map<String, Object> extractObjectCache = new HashCache<String, 
Object>();
-  static Map<String, String[]> pathExprCache = new HashCache<String, 
String[]>();
-  static Map<String, ArrayList> indexListCache =
+  Map<String, Object> extractObjectCache = new HashCache<String, Object>();
+  Map<String, String[]> pathExprCache = new HashCache<String, String[]>();
+  Map<String, ArrayList> indexListCache =
   new HashCache<String, ArrayList>();
-  static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
-  static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, 
Boolean>();
-
-  Text result = new Text();
+  Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
+  Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>();
 
   public UDFJson() {
+// Allows for unescaped ASCII control characters in JSON values
+jsonFactory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
+// Enabled to accept quoting of all character backslash qooting mechanism
+jsonFactory.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER);
   }
 
   /**
@@ -125,13 +121,13 @@ public class UDFJson extends UDF {
* @return json string or null when an error happens.
*/
   public Text evaluate(String jsonString, String pathString) {
-
 if (jsonString == null || jsonString.isEmpty() || pathString == null
 || pathString.isEmpty() || pathString.charAt(0) != '$') {
   return null;
 }
 
 int pathExprStart = 1;
+boolean unknownType = pathString.equals("$");
 boolean isRootArray = false;
 
 if (pathString.length() > 1) {
@@ -155,23 +151,41 @@ public class UDFJson extends UDF {
 // Cache extractObject
 Object extractObject = extractObjectCache.get(jsonString);
 if (extractObject == null) {
-  JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE;
-  try {
-extractObject = MAPPER.rea

hive git commit: HIVE-16156: FileSinkOperator should delete existing output target when renaming (Reviewed by Sergey)

2017-03-10 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 8dda898ba -> 76b65baa7


HIVE-16156: FileSinkOperator should delete existing output target when renaming 
(Reviewed by Sergey)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76b65baa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76b65baa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76b65baa

Branch: refs/heads/master
Commit: 76b65baa7b5f5c0e5c1f99cf0621247f65fb0b00
Parents: 8dda898
Author: Xuefu Zhang <xu...@uber.com>
Authored: Fri Mar 10 22:14:49 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Fri Mar 10 22:14:49 2017 -0800

--
 .../apache/hadoop/hive/ql/exec/FileSinkOperator.java| 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/76b65baa/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 3bbe92d..a9d03d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -229,8 +229,18 @@ public class FileSinkOperator extends 
TerminalOperator implements
 }
   }
   if (needToRename && outPaths[idx] != null && 
!fs.rename(outPaths[idx], finalPaths[idx])) {
-throw new HiveException("Unable to rename output from: " +
+FileStatus fileStatus = FileUtils.getFileStatusOrNull(fs, 
finalPaths[idx]);
+if (fileStatus != null) {
+  LOG.warn("Target path " + finalPaths[idx] + " with a size " + 
fileStatus.getLen() + " exists. Trying to delete it.");
+  if (!fs.delete(finalPaths[idx], true)) {
+throw new HiveException("Unable to delete existing target 
output: " + finalPaths[idx]);
+  }
+}
+
+if (!fs.rename(outPaths[idx], finalPaths[idx])) {
+  throw new HiveException("Unable to rename output from: " +
 outPaths[idx] + " to: " + finalPaths[idx]);
+}
   }
   updateProgress();
 } catch (IOException e) {



hive git commit: HIVE-15671: RPCServer.registerClient() erroneously uses server/client handshake timeout for connection timeout (reviewed by Jimmy)

2017-02-13 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 791066178 -> 401b14ac7


HIVE-15671: RPCServer.registerClient() erroneously uses server/client handshake 
timeout for connection timeout (reviewed by Jimmy)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/401b14ac
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/401b14ac
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/401b14ac

Branch: refs/heads/master
Commit: 401b14ac778ff58cbc5e76f08de002ea4edf3c57
Parents: 7910661
Author: Xuefu Zhang <xu...@uber.com>
Authored: Mon Feb 13 11:08:53 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Mon Feb 13 11:08:53 2017 -0800

--
 .../src/main/java/org/apache/hive/spark/client/rpc/Rpc.java| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/401b14ac/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
--
diff --git 
a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java 
b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
index b2f133b..0489684 100644
--- a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
+++ b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
@@ -120,7 +120,7 @@ public class Rpc implements Closeable {
   }
 };
 final ScheduledFuture timeoutFuture = eloop.schedule(timeoutTask,
-rpcConf.getServerConnectTimeoutMs(), TimeUnit.MILLISECONDS);
+connectTimeoutMs, TimeUnit.MILLISECONDS);
 
 // The channel listener instantiates the Rpc instance when the connection 
is established,
 // and initiates the SASL handshake.



hive git commit: HIVE-15683: Make what's done in HIVE-15580 for group by configurable (reviewed by Chao)

2017-02-08 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 19a6831b9 -> 6c901fb3e


HIVE-15683: Make what's done in HIVE-15580 for group by configurable (reviewed 
by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c901fb3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c901fb3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c901fb3

Branch: refs/heads/master
Commit: 6c901fb3e681edb76e3251996b14dac4ae092ce5
Parents: 19a6831
Author: Xuefu Zhang <xu...@uber.com>
Authored: Wed Feb 8 14:58:19 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Wed Feb 8 14:58:19 2017 -0800

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  4 ++
 .../hive/ql/exec/spark/GroupByShuffler.java | 11 +++--
 .../hive/ql/exec/spark/HiveReduceFunction.java  | 10 ++---
 .../spark/HiveReduceFunctionResultList.java | 18 ++---
 .../hadoop/hive/ql/exec/spark/ReduceTran.java   |  8 ++--
 .../hive/ql/exec/spark/RepartitionShuffler.java | 42 
 .../hive/ql/exec/spark/SortByShuffler.java  |  2 +-
 .../hive/ql/exec/spark/SparkPlanGenerator.java  |  6 ++-
 .../hive/ql/exec/spark/SparkShuffler.java   |  4 +-
 .../clientpositive/lateral_view_explode2.q  |  4 +-
 .../clientpositive/spark/union_remove_25.q.out  |  2 +-
 11 files changed, 85 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f3b01b2..e82758f 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3228,6 +3228,10 @@ public class HiveConf extends Configuration {
 SPARK_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE(
 "hive.spark.dynamic.partition.pruning.max.data.size", 100*1024*1024L,
 "Maximum total data size in dynamic pruning."),
+SPARK_USE_GROUPBY_SHUFFLE(
+"hive.spark.use.groupby.shuffle", true,
+"Spark groupByKey transformation has better performance but uses 
unbounded memory." +
+"Turn this off when there is a memory issue."),
 NWAYJOINREORDER("hive.reorder.nway.joins", true,
   "Runs reordering of tables within single n-way join (i.e.: picks 
streamtable)"),
 HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true,

http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
index 8267515..9f9e3b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
@@ -20,18 +20,17 @@ package org.apache.hadoop.hive.ql.exec.spark;
 
 import org.apache.hadoop.hive.ql.io.HiveKey;
 import org.apache.hadoop.io.BytesWritable;
-import org.apache.spark.HashPartitioner;
 import org.apache.spark.api.java.JavaPairRDD;
 
-public class GroupByShuffler implements SparkShuffler {
+public class GroupByShuffler implements SparkShuffler<Iterable> 
{
 
   @Override
-  public JavaPairRDD<HiveKey, BytesWritable> shuffle(
+  public JavaPairRDD<HiveKey, Iterable> shuffle(
   JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
-if (numPartitions < 0) {
-  numPartitions = 1;
+if (numPartitions > 0) {
+  return input.groupByKey(numPartitions);
 }
-return input.repartitionAndSortWithinPartitions(new 
HashPartitioner(numPartitions));
+return input.groupByKey();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
index 2b85872..2b6e2de 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
@@ -25,8 +25,8 @@ import org.apache.hadoop.io.BytesWritable;
 
 import scala.Tuple2;
 
-public class HiveReduceFunction extends HivePairFlatMapFunction<
-  Iterator<Tuple2<HiveKey, BytesWritable>>, HiveKey

hive git commit: HIVE-15682: Eliminate per-row based dummy iterator creation (reviewed by Chao)

2017-02-07 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 3e01ef326 -> 561dbe3b9


HIVE-15682: Eliminate per-row based dummy iterator creation (reviewed by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/561dbe3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/561dbe3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/561dbe3b

Branch: refs/heads/master
Commit: 561dbe3b90bc5cd85a64e22ccd9e384bbf67a782
Parents: 3e01ef3
Author: Xuefu Zhang <xu...@uber.com>
Authored: Tue Feb 7 13:48:55 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Tue Feb 7 13:48:55 2017 -0800

--
 .../ql/exec/spark/SparkReduceRecordHandler.java | 56 
 1 file changed, 35 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/561dbe3b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
index 44f2e4d..8251900 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
@@ -211,30 +211,44 @@ public class SparkReduceRecordHandler extends 
SparkRecordHandler {
   }
 
   /**
-   * TODO: Instead of creating a dummy iterator per row, we can implement a 
private method that's
-   * similar to processRow(Object key, Iterator values) but processes one 
row at a time. Then,
-   * we just call that private method here.
+   * A reusable dummy iterator that has only one value.
+   *
*/
-  @Override
-  public void processRow(Object key, final Object value) throws IOException {
-processRow(key, new Iterator() {
-  boolean done = false;
-  @Override
-  public boolean hasNext() {
-return !done;
-  }
+  private static class DummyIterator implements Iterator {
+private boolean done = false;
+private Object value = null;
 
-  @Override
-  public Object next() {
-done = true;
-return value;
-  }
+public void setValue(Object v) {
+  this.value = v;
+  done = false;
+}
 
-  @Override
-  public void remove() {
-throw new UnsupportedOperationException("Iterator.remove() is not 
implemented/supported");
-  }
-});
+@Override
+public boolean hasNext() {
+  return !done;
+}
+
+@Override
+public Object next() {
+  done = true;
+  return value;
+}
+
+@Override
+public void remove() {
+  throw new UnsupportedOperationException("Iterator.remove() is not 
implemented/supported");
+}
+  }
+
+  private DummyIterator dummyIterator = new DummyIterator();
+
+  /**
+   * Process one row using a dummy iterator.
+   */
+  @Override
+  public void processRow(Object key, final Object value) throws IOException {
+dummyIterator.setValue(value);
+processRow(key, dummyIterator);
   }
 
   @Override



hive git commit: HIVE-15749: Add missing ASF headers (Peter via Xuefu)

2017-02-01 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 4a03fb1da -> 5c403e9fc


HIVE-15749: Add missing ASF headers (Peter via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c403e9f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c403e9f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c403e9f

Branch: refs/heads/master
Commit: 5c403e9fc0552559914079ca480eba8b856b7ee8
Parents: 4a03fb1
Author: Xuefu Zhang <xu...@uber.com>
Authored: Wed Feb 1 13:51:59 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Wed Feb 1 13:51:59 2017 -0800

--
 .../hive/common/classification/RetrySemantics.java | 17 +
 .../hadoop/hive/druid/io/DruidRecordWriter.java| 17 +
 .../hive/jdbc/TestHivePreparedStatement.java   | 17 +
 .../hive/llap/io/encoded/LineRrOffsetReader.java   | 17 +
 .../hive/llap/io/encoded/PassThruOffsetReader.java | 17 +
 .../hadoop/hive/ql/parse/TestMergeStatement.java   | 17 +
 .../apache/hadoop/hive/ql/plan/TestMapWork.java| 17 +
 7 files changed, 119 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java
--
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java
 
b/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java
index abad45e..5883b01 100644
--- 
a/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java
+++ 
b/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.hadoop.hive.common.classification;
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java
index 1601a9a..3323cc0 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.hadoop.hive.druid.io;
 
 import com.google.common.base.Function;

http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java
--
diff --git a/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java 
b/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java
index bc49aeb..2a68c91 100644
--- a/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java
+++ b/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) und

hive git commit: HIVE-15580: Eliminate unbounded memory usage for orderBy and groupBy in Hive on Spark (reviewed by Chao Sun)

2017-01-20 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master f968cf78a -> 811b3e39e


HIVE-15580: Eliminate unbounded memory usage for orderBy and groupBy in Hive on 
Spark (reviewed by Chao Sun)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/811b3e39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/811b3e39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/811b3e39

Branch: refs/heads/master
Commit: 811b3e39ed569232c4f138c1287109ef8ebce132
Parents: f968cf7
Author: Xuefu Zhang <xu...@uber.com>
Authored: Fri Jan 20 12:56:49 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Fri Jan 20 12:56:49 2017 -0800

--
 .../hive/ql/exec/spark/GroupByShuffler.java | 10 +--
 .../hive/ql/exec/spark/HiveReduceFunction.java  |  4 +-
 .../spark/HiveReduceFunctionResultList.java |  8 +--
 .../hadoop/hive/ql/exec/spark/ReduceTran.java   |  4 +-
 .../hadoop/hive/ql/exec/spark/ShuffleTran.java  |  6 +-
 .../hive/ql/exec/spark/SortByShuffler.java  | 65 +---
 .../hive/ql/exec/spark/SparkPlanGenerator.java  |  7 ---
 .../ql/exec/spark/SparkReduceRecordHandler.java | 29 +++--
 .../hive/ql/exec/spark/SparkShuffler.java   |  2 +-
 .../queries/clientpositive/union_top_level.q|  8 +--
 .../clientpositive/llap/union_top_level.q.out   | 52 
 .../spark/lateral_view_explode2.q.out   |  2 +-
 .../clientpositive/spark/union_remove_25.q.out  |  2 +-
 .../clientpositive/spark/union_top_level.q.out  | 62 +--
 .../spark/vector_outer_join5.q.out  | 40 ++--
 15 files changed, 124 insertions(+), 177 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
index e128dd2..8267515 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java
@@ -20,21 +20,23 @@ package org.apache.hadoop.hive.ql.exec.spark;
 
 import org.apache.hadoop.hive.ql.io.HiveKey;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.spark.HashPartitioner;
 import org.apache.spark.api.java.JavaPairRDD;
 
 public class GroupByShuffler implements SparkShuffler {
 
   @Override
-  public JavaPairRDD<HiveKey, Iterable> shuffle(
+  public JavaPairRDD<HiveKey, BytesWritable> shuffle(
   JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
-if (numPartitions > 0) {
-  return input.groupByKey(numPartitions);
+if (numPartitions < 0) {
+  numPartitions = 1;
 }
-return input.groupByKey();
+return input.repartitionAndSortWithinPartitions(new 
HashPartitioner(numPartitions));
   }
 
   @Override
   public String getName() {
 return "GroupBy";
   }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
index eeb4443..2b85872 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.io.BytesWritable;
 import scala.Tuple2;
 
 public class HiveReduceFunction extends HivePairFlatMapFunction<
-  Iterator<Tuple2<HiveKey, Iterable>>, HiveKey, BytesWritable> {
+  Iterator<Tuple2<HiveKey, BytesWritable>>, HiveKey, BytesWritable> {
 
   private static final long serialVersionUID = 1L;
 
@@ -37,7 +37,7 @@ public class HiveReduceFunction extends 
HivePairFlatMapFunction<
   @SuppressWarnings("unchecked")
   @Override
   public Iterator<Tuple2<HiveKey, BytesWritable>>
-  call(Iterator<Tuple2<HiveKey, Iterable>> it) throws Exception 
{
+  call(Iterator<Tuple2<HiveKey, BytesWritable>> it) throws Exception {
 initJobConf();
 
 SparkReduceRecordHandler reducerRecordhandler = new 
SparkReduceRecordHandler();

http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java
 
b/ql/src/java/org/ap

hive git commit: HIVE-15543: Don't try to get memory/cores to decide parallelism when Spark dynamic allocation is enabled (Reviewed by Rui)

2017-01-05 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 9e7d384f6 -> ccc9bf3ea


HIVE-15543: Don't try to get memory/cores to decide parallelism when Spark 
dynamic allocation is enabled (Reviewed by Rui)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccc9bf3e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccc9bf3e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccc9bf3e

Branch: refs/heads/master
Commit: ccc9bf3eaadadcbb3c93faa4a9ccc0e20c41dc28
Parents: 9e7d384
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Jan 5 10:56:02 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Jan 5 10:56:02 2017 -0800

--
 .../spark/SetSparkReducerParallelism.java   | 56 
 1 file changed, 33 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ccc9bf3e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
index ff4924d..7a5b71f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
@@ -53,6 +53,8 @@ public class SetSparkReducerParallelism implements 
NodeProcessor {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(SetSparkReducerParallelism.class.getName());
 
+  private static final String SPARK_DYNAMIC_ALLOCATION_ENABLED = 
"spark.dynamicAllocation.enabled";
+
   // Spark memory per task, and total number of cores
   private ObjectPair<Long, Integer> sparkMemoryAndCores;
 
@@ -109,34 +111,12 @@ public class SetSparkReducerParallelism implements 
NodeProcessor {
   }
 }
 
-if (sparkMemoryAndCores == null) {
-  SparkSessionManager sparkSessionManager = null;
-  SparkSession sparkSession = null;
-  try {
-sparkSessionManager = SparkSessionManagerImpl.getInstance();
-sparkSession = SparkUtilities.getSparkSession(
-  context.getConf(), sparkSessionManager);
-sparkMemoryAndCores = sparkSession.getMemoryAndCores();
-  } catch (HiveException e) {
-throw new SemanticException("Failed to get a spark session: " + e);
-  } catch (Exception e) {
-LOG.warn("Failed to get spark memory/core info", e);
-  } finally {
-if (sparkSession != null && sparkSessionManager != null) {
-  try {
-sparkSessionManager.returnSession(sparkSession);
-  } catch (HiveException ex) {
-LOG.error("Failed to return the session to SessionManager: " + 
ex, ex);
-  }
-}
-  }
-}
-
 // Divide it by 2 so that we can have more reducers
 long bytesPerReducer = 
context.getConf().getLongVar(HiveConf.ConfVars.BYTESPERREDUCER) / 2;
 int numReducers = Utilities.estimateReducers(numberOfBytes, 
bytesPerReducer,
 maxReducers, false);
 
+getSparkMemoryAndCores(context);
 if (sparkMemoryAndCores != null &&
 sparkMemoryAndCores.getFirst() > 0 && 
sparkMemoryAndCores.getSecond() > 0) {
   // warn the user if bytes per reducer is much larger than memory per 
task
@@ -184,4 +164,34 @@ public class SetSparkReducerParallelism implements 
NodeProcessor {
 return false;
   }
 
+  private void getSparkMemoryAndCores(OptimizeSparkProcContext context) throws 
SemanticException {
+if (context.getConf().getBoolean(SPARK_DYNAMIC_ALLOCATION_ENABLED, false)) 
{
+  // If dynamic allocation is enabled, numbers for memory and cores are 
meaningless. So, we don't
+  // try to get it.
+  sparkMemoryAndCores = null;
+  return;
+}
+
+SparkSessionManager sparkSessionManager = null;
+SparkSession sparkSession = null;
+try {
+  sparkSessionManager = SparkSessionManagerImpl.getInstance();
+  sparkSession = SparkUtilities.getSparkSession(
+  context.getConf(), sparkSessionManager);
+  sparkMemoryAndCores = sparkSession.getMemoryAndCores();
+} catch (HiveException e) {
+  throw new SemanticException("Failed to get a spark session: " + e);
+} catch (Exception e) {
+  LOG.warn("Failed to get spark memory/core info", e);
+} finally {
+  if (sparkSession != null && sparkSessionManager != null) {
+try {
+  sparkSessionManager.returnSession(sp

hive git commit: HIVE-15528: Expose Spark job error in SparkTask (Zhihai via Xuefu)

2017-01-03 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 5d45974e9 -> c928ad3d3


HIVE-15528: Expose Spark job error in SparkTask (Zhihai via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c928ad3d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c928ad3d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c928ad3d

Branch: refs/heads/master
Commit: c928ad3d3f958d1e2e109b689fc5c6e9ee3e619b
Parents: 5d45974
Author: Xuefu Zhang <xu...@uber.com>
Authored: Tue Jan 3 10:39:39 2017 -0800
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Tue Jan 3 10:39:39 2017 -0800

--
 .../org/apache/hadoop/hive/ql/exec/spark/SparkTask.java | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c928ad3d/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
index f836065..87d80a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
@@ -114,7 +114,7 @@ public class SparkTask extends Task {
   this.jobID = jobRef.getSparkJobStatus().getAppID();
   rc = jobRef.monitorJob();
   SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus();
-  getSparkJobInfo(sparkJobStatus);
+  getSparkJobInfo(sparkJobStatus, rc);
   if (rc == 0) {
 sparkStatistics = sparkJobStatus.getSparkStatistics();
 if (LOG.isInfoEnabled() && sparkStatistics != null) {
@@ -139,6 +139,7 @@ public class SparkTask extends Task {
   // org.apache.commons.lang.StringUtils
   console.printError(msg, "\n" + 
org.apache.hadoop.util.StringUtils.stringifyException(e));
   LOG.error(msg, e);
+  setException(e);
   rc = 1;
 } finally {
   startTime = perfLogger.getEndTime(PerfLogger.SPARK_SUBMIT_TO_RUNNING);
@@ -196,6 +197,7 @@ public class SparkTask extends Task {
 String mesg = "Job Commit failed with exception '"
 + Utilities.getNameMessage(e) + "'";
 console.printError(mesg, "\n" + StringUtils.stringifyException(e));
+setException(e);
   }
 }
 return rc;
@@ -330,7 +332,7 @@ public class SparkTask extends Task {
 return counters;
   }
 
-  private void getSparkJobInfo(SparkJobStatus sparkJobStatus) {
+  private void getSparkJobInfo(SparkJobStatus sparkJobStatus, int rc) {
 try {
   stageIds = new ArrayList();
   int[] ids = sparkJobStatus.getStageIds();
@@ -355,6 +357,12 @@ public class SparkTask extends Task {
   succeededTaskCount = sumComplete;
   totalTaskCount = sumTotal;
   failedTaskCount = sumFailed;
+  if (rc != 0) {
+Throwable error = sparkJobStatus.getError();
+if (error != null) {
+  setException(error);
+}
+  }
 } catch (Exception e) {
   LOG.error("Failed to get Spark job information", e);
 }



hive git commit: HIVE-14617: NPE in UDF MapValues() if input is null (reviewed by Chao)

2016-08-25 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 9343fee5d -> 2f686d4c0


HIVE-14617: NPE in UDF MapValues() if input is null (reviewed by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2f686d4c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2f686d4c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2f686d4c

Branch: refs/heads/master
Commit: 2f686d4c0c20540079660de202c619e42ed5cd4f
Parents: 9343fee
Author: Xuefu Zhang <xu...@uber.com>
Authored: Thu Aug 25 11:05:25 2016 -0700
Committer: Xuefu Zhang <xu...@uber.com>
Committed: Thu Aug 25 11:05:25 2016 -0700

--
 .../ql/udf/generic/GenericUDFMapValues.java |  6 ++-
 .../ql/udf/generic/TestGenericUDFMapValues.java | 56 
 2 files changed, 61 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2f686d4c/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java
index 096ceac..3bd5864 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.udf.generic;
 
 import java.util.ArrayList;
+import java.util.Map;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -61,7 +62,10 @@ public class GenericUDFMapValues extends GenericUDF {
   public Object evaluate(DeferredObject[] arguments) throws HiveException {
 retArray.clear();
 Object mapObj = arguments[0].get();
-retArray.addAll(mapOI.getMap(mapObj).values());
+Map map = mapOI.getMap(mapObj);
+if (map != null) {
+  retArray.addAll(map.values());
+}
 return retArray;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2f686d4c/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java
new file mode 100644
index 000..44676ed
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestGenericUDFMapValues {
+
+  @Test
+  public void testNullMap() throws HiveException, IOException {
+ObjectInspector[] inputOIs = {
+ObjectInspectorFactory.getStandardMapObjectInspector(
+PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+PrimitiveObjectInspectorFactory.writableStringObjectInspector),
+};
+
+Map<String, String> input = null;
+DeferredObject[] args = {
+new DeferredJavaObject(input)
+};
+
+  GenericUDFMapValues udf = new GenericUDFMapValues();
+StandardListObjectInspector oi = (StandardListObjectInspector) 
udf.initialize(inputOIs);
+Object res = udf.evaluate(args);
+

svn commit: r1733688 - /hive/cms/trunk/content/people.mdtext

2016-03-04 Thread xuefu
Author: xuefu
Date: Sat Mar  5 04:28:38 2016
New Revision: 1733688

URL: http://svn.apache.org/viewvc?rev=1733688=rev
Log:
Update Xuefu's information

Modified:
hive/cms/trunk/content/people.mdtext

Modified: hive/cms/trunk/content/people.mdtext
URL: 
http://svn.apache.org/viewvc/hive/cms/trunk/content/people.mdtext?rev=1733688=1733687=1733688=diff
==
--- hive/cms/trunk/content/people.mdtext (original)
+++ hive/cms/trunk/content/people.mdtext Sat Mar  5 04:28:38 2016
@@ -214,9 +214,9 @@ tr:nth-child(2n+1) {
 
 
 
-xuefuz 
+xuefu 
 Xuefu Zhang 
-http://cloudera.com/;>Cloudera 
+ 
 
 
 




hive git commit: HIVE-13101: NullPointerException in HiveLexer.g (Sandeep via Xuefu)

2016-02-24 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master bc4dcf376 -> e9b734852


HIVE-13101: NullPointerException in HiveLexer.g (Sandeep via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9b73485
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9b73485
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9b73485

Branch: refs/heads/master
Commit: e9b73485281730abf73b35d9029000edd42fa35c
Parents: bc4dcf3
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Wed Feb 24 15:50:47 2016 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Wed Feb 24 15:50:47 2016 -0800

--
 ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e9b73485/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 4c4470b..3f92d16 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -31,6 +31,9 @@ import org.apache.hadoop.hive.conf.HiveConf;
   }
   
   protected boolean allowQuotedId() {
+if(hiveConf == null){
+  return false;
+}
 String supportedQIds = HiveConf.getVar(hiveConf, 
HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
 return !"none".equals(supportedQIds);
   }



hive git commit: HIVE-13045: move guava dependency back to 14 after HIVE-12952 (Mohit via Xuefu)

2016-02-19 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master cc8cec235 -> fd59191be


HIVE-13045: move guava dependency back to 14 after HIVE-12952 (Mohit via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fd59191b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fd59191b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fd59191b

Branch: refs/heads/master
Commit: fd59191be047a980dec704a2a1e764fd22d22936
Parents: cc8cec2
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Fri Feb 19 15:02:40 2016 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Fri Feb 19 15:02:40 2016 -0800

--
 pom.xml| 2 +-
 .../org/apache/hive/service/cli/operation/OperationManager.java| 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fd59191b/pom.xml
--
diff --git a/pom.xml b/pom.xml
index af2e3d1..836e397 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,7 +129,7 @@
 1.4
 10.10.2.0
 3.1.0
-15.0
+14.0.1
 2.4.4
 2.6.0
 
${basedir}/${hive.path.to.root}/testutils/hadoop

http://git-wip-us.apache.org/repos/asf/hive/blob/fd59191b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java 
b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
index 96c01de..1b8aca9 100644
--- 
a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
+++ 
b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
@@ -28,7 +28,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
-import com.google.common.collect.EvictingQueue;
 import org.apache.hadoop.hive.common.metrics.common.Metrics;
 import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
 import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;



hive git commit: HIVE-12205: Unify metric collection for local and remote spark client. (Chinna via Chengxiang)

2016-02-17 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master a6d9bf76e -> 9829f9985


HIVE-12205: Unify metric collection for local and remote spark client. (Chinna 
via Chengxiang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9829f998
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9829f998
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9829f998

Branch: refs/heads/master
Commit: 9829f9985c48742a070b0f09889d8d74d24b5553
Parents: a6d9bf7
Author: chengxiang <chengxi...@apache.com>
Authored: Wed Feb 17 18:36:51 2016 +0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Wed Feb 17 06:34:10 2016 -0800

--
 .../spark/status/impl/LocalSparkJobStatus.java  | 94 +++-
 .../spark/status/impl/RemoteSparkJobStatus.java | 35 +---
 .../exec/spark/status/impl/SparkJobUtils.java   | 56 
 .../hive/spark/client/MetricsCollection.java|  2 +-
 4 files changed, 73 insertions(+), 114 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9829f998/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java
index 3c15521..d4819d9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java
@@ -28,6 +28,8 @@ import 
org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistics;
 import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder;
 import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus;
 import org.apache.hadoop.hive.ql.exec.spark.status.SparkStageProgress;
+import org.apache.hive.spark.client.MetricsCollection;
+import org.apache.hive.spark.client.metrics.Metrics;
 import org.apache.hive.spark.counter.SparkCounters;
 import org.apache.spark.JobExecutionStatus;
 import org.apache.spark.SparkJobInfo;
@@ -135,7 +137,18 @@ public class LocalSparkJobStatus implements SparkJobStatus 
{
   return null;
 }
 
-Map<String, Long> flatJobMetric = combineJobLevelMetrics(jobMetric);
+MetricsCollection metricsCollection = new MetricsCollection();
+Set stageIds = jobMetric.keySet();
+for (String stageId : stageIds) {
+  List taskMetrics = jobMetric.get(stageId);
+  for (TaskMetrics taskMetric : taskMetrics) {
+Metrics metrics = new Metrics(taskMetric);
+metricsCollection.addMetrics(jobId, Integer.parseInt(stageId), 0, 
metrics);
+  }
+}
+SparkJobUtils sparkJobUtils = new SparkJobUtils();
+Map<String, Long> flatJobMetric = 
sparkJobUtils.collectMetrics(metricsCollection
+.getAllMetrics());
 for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) {
   sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), 
Long.toString(entry.getValue()));
 }
@@ -153,85 +166,6 @@ public class LocalSparkJobStatus implements SparkJobStatus 
{
 }
   }
 
-  private Map<String, Long> combineJobLevelMetrics(Map<String, 
List> jobMetric) {
-Map<String, Long> results = Maps.newLinkedHashMap();
-
-long executorDeserializeTime = 0;
-long executorRunTime = 0;
-long resultSize = 0;
-long jvmGCTime = 0;
-long resultSerializationTime = 0;
-long memoryBytesSpilled = 0;
-long diskBytesSpilled = 0;
-long bytesRead = 0;
-long remoteBlocksFetched = 0;
-long localBlocksFetched = 0;
-long fetchWaitTime = 0;
-long remoteBytesRead = 0;
-long shuffleBytesWritten = 0;
-long shuffleWriteTime = 0;
-boolean inputMetricExist = false;
-boolean shuffleReadMetricExist = false;
-boolean shuffleWriteMetricExist = false;
-
-for (List stageMetric : jobMetric.values()) {
-  if (stageMetric != null) {
-for (TaskMetrics taskMetrics : stageMetric) {
-  if (taskMetrics != null) {
-executorDeserializeTime += taskMetrics.executorDeserializeTime();
-executorRunTime += taskMetrics.executorRunTime();
-resultSize += taskMetrics.resultSize();
-jvmGCTime += taskMetrics.jvmGCTime();
-resultSerializationTime += taskMetrics.resultSerializationTime();
-memoryBytesSpilled += taskMetrics.memoryBytesSpilled();
-diskBytesSpilled += taskMetrics.diskBytesSpilled();
-if (!taskMetrics.inputMetrics().isEmpty()) {
-  inputMetricExist = true;
-  bytesRead += taskMetrics.inputMetrics().get().bytesRead();
-}
-Option shuffle

hive git commit: HIVE-12888: TestSparkNegativeCliDriver does not run in Spark mode[Spark Branch] (Chengxiang via Xuefu)

2016-01-26 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/spark 8e0a10c82 -> e07826041


HIVE-12888: TestSparkNegativeCliDriver does not run in Spark mode[Spark Branch] 
(Chengxiang via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e0782604
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e0782604
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e0782604

Branch: refs/heads/spark
Commit: e07826041e0326228ab4eeeaebe46625bbac3c99
Parents: 8e0a10c
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Tue Jan 26 19:31:49 2016 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Tue Jan 26 19:31:49 2016 -0800

--
 ql/src/test/templates/TestNegativeCliDriver.vm | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e0782604/ql/src/test/templates/TestNegativeCliDriver.vm
--
diff --git a/ql/src/test/templates/TestNegativeCliDriver.vm 
b/ql/src/test/templates/TestNegativeCliDriver.vm
index 5f8ee8e..2ea476f 100644
--- a/ql/src/test/templates/TestNegativeCliDriver.vm
+++ b/ql/src/test/templates/TestNegativeCliDriver.vm
@@ -41,13 +41,17 @@ public class $className extends TestCase {
 
   static {
 MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode");
+String hiveConfDir = "$hiveConfDir";
 String initScript = "$initScript";
 String cleanupScript = "$cleanupScript";
 
 try {
   String hadoopVer = "$hadoopVersion";
-  qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), 
miniMR, hadoopVer,
-   initScript, cleanupScript);
+  if (!hiveConfDir.isEmpty()) {
+hiveConfDir = HIVE_ROOT + hiveConfDir;
+  }
+  qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), 
miniMR,
+   hiveConfDir, hadoopVer, initScript, cleanupScript);
   // do a one time initialization
   qt.cleanUp();
   qt.createSources();



hive git commit: HIVE-12708: Hive on Spark doesn't work with Kerboresed HBase [Spark Branch] (reviewed by Szehon)

2015-12-18 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/spark 9af0b27bd -> a116e96b7


HIVE-12708: Hive on Spark doesn't work with Kerboresed HBase [Spark Branch] 
(reviewed by Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a116e96b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a116e96b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a116e96b

Branch: refs/heads/spark
Commit: a116e96b75998b5e8632c46678cd94c551fba78a
Parents: 9af0b27
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Fri Dec 18 14:37:03 2015 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Fri Dec 18 14:37:03 2015 -0800

--
 .../hive/ql/exec/spark/HiveSparkClientFactory.java   | 11 +++
 1 file changed, 11 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a116e96b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
index ec0fdea..9b2dce3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
@@ -30,6 +30,7 @@ import org.apache.commons.compress.utils.CharsetNames;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -67,6 +68,7 @@ public class HiveSparkClientFactory {
 
   public static Map<String, String> initiateSparkConf(HiveConf hiveConf) {
 Map<String, String> sparkConf = new HashMap<String, String>();
+HBaseConfiguration.addHbaseResources(hiveConf);
 
 // set default spark configurations.
 sparkConf.put("spark.master", SPARK_DEFAULT_MASTER);
@@ -139,7 +141,16 @@ public class HiveSparkClientFactory {
 if (value != null && !value.isEmpty()) {
   sparkConf.put("spark.hadoop." + propertyName, value);
 }
+  } else if (propertyName.startsWith("hbase")) {
+// Add HBase related configuration to Spark because in security mode, 
Spark needs it
+// to generate hbase delegation token for Spark. This is a temp 
solution to deal with
+// Spark problem.
+String value = hiveConf.get(propertyName);
+sparkConf.put("spark.hadoop." + propertyName, value);
+LOG.info(String.format(
+  "load HBase configuration (%s -> %s).", propertyName, value));
   }
+
   if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) {
 String value = RpcConfiguration.getValue(hiveConf, propertyName);
 sparkConf.put(propertyName, value);



hive git commit: HIVE-12538: After set spark related config, SparkSession never get reused (Nemon Lou via Xuefu)

2015-12-16 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 09b6f9a36 -> 305b8ce40


HIVE-12538: After set spark related config, SparkSession never get reused 
(Nemon Lou via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/305b8ce4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/305b8ce4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/305b8ce4

Branch: refs/heads/master
Commit: 305b8ce4097a692a2ee718b1df384d98d1e6fc1a
Parents: 09b6f9a
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Wed Dec 16 08:31:27 2015 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Wed Dec 16 08:31:27 2015 -0800

--
 .../java/org/apache/hadoop/hive/conf/HiveConf.java|  4 +++-
 .../org/apache/hadoop/hive/conf/TestHiveConf.java | 14 ++
 .../hadoop/hive/ql/exec/spark/SparkUtilities.java |  6 +-
 3 files changed, 22 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 243f281..b5aee00 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2836,7 +2836,9 @@ public class HiveConf extends Configuration {
   // When either name or value is null, the set method below will fail,
   // and throw IllegalArgumentException
   set(name, value);
-  isSparkConfigUpdated = isSparkRelatedConfig(name);
+  if (isSparkRelatedConfig(name)) {
+isSparkConfigUpdated = true;
+  }
 }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java
--
diff --git a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java 
b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java
index 3b7a525..cd472c7 100644
--- a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java
+++ b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java
@@ -142,4 +142,18 @@ public class TestHiveConf {
 Assert.assertEquals("", conf2.get(HiveConf.ConfVars.METASTOREPWD.varname));
 Assert.assertEquals("", 
conf2.get(HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname));
   }
+
+  @Test
+  public void testSparkConfigUpdate(){
+HiveConf conf = new HiveConf();
+Assert.assertFalse(conf.getSparkConfigUpdated());
+
+conf.verifyAndSet("spark.master", "yarn-cluster");
+Assert.assertTrue(conf.getSparkConfigUpdated());
+conf.verifyAndSet("hive.execution.engine", "spark");
+Assert.assertTrue("Expected spark config updated.", 
conf.getSparkConfigUpdated());
+
+conf.setSparkConfigUpdated(false);
+Assert.assertFalse(conf.getSparkConfigUpdated());
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
index 0268469..a61cdc5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java
@@ -121,12 +121,16 @@ public class SparkUtilities {
   public static SparkSession getSparkSession(HiveConf conf,
   SparkSessionManager sparkSessionManager) throws HiveException {
 SparkSession sparkSession = SessionState.get().getSparkSession();
+HiveConf sessionConf = SessionState.get().getConf();
 
 // Spark configurations are updated close the existing session
-if (conf.getSparkConfigUpdated()) {
+// In case of async queries or confOverlay is not empty,
+// sessionConf and conf are different objects
+if (sessionConf.getSparkConfigUpdated() || conf.getSparkConfigUpdated()) {
   sparkSessionManager.closeSession(sparkSession);
   sparkSession =  null;
   conf.setSparkConfigUpdated(false);
+  sessionConf.setSparkConfigUpdated(false);
 }
 sparkSession = sparkSessionManager.getSession(sparkSession, conf, true);
 SessionState.get().setSparkSession(sparkSession);



hive git commit: HIVE-12568: Provide an option to specify network interface used by Spark remote client [Spark Branch] (reviewed by Jimmy)

2015-12-07 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/spark e4b8cf43c -> 9af0b27bd


HIVE-12568: Provide an option to specify network interface used by Spark remote 
client [Spark Branch] (reviewed by Jimmy)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9af0b27b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9af0b27b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9af0b27b

Branch: refs/heads/spark
Commit: 9af0b27bda6352eb229058db57a25fe65eb81f9a
Parents: e4b8cf4
Author: xzhang 
Authored: Mon Dec 7 11:10:25 2015 -0800
Committer: xzhang 
Committed: Mon Dec 7 11:10:25 2015 -0800

--
 .../apache/hadoop/hive/common/ServerUtils.java  | 19 +++
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  5 ++
 .../service/cli/thrift/ThriftCLIService.java| 15 +++---
 .../hive/spark/client/rpc/RpcConfiguration.java | 57 +++-
 4 files changed, 50 insertions(+), 46 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java
index 83517ce..b44f92f 100644
--- a/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.hive.common;
 
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.FileSystem;
@@ -47,4 +50,20 @@ public class ServerUtils {
 }
   }
 
+  /**
+   * Get the Inet address of the machine of the given host name.
+   * @param hostname The name of the host
+   * @return The network address of the the host
+   * @throws UnknownHostException
+   */
+  public static InetAddress getHostAddress(String hostname) throws 
UnknownHostException {
+InetAddress serverIPAddress;
+if (hostname != null && !hostname.isEmpty()) {
+  serverIPAddress = InetAddress.getByName(hostname);
+} else {
+  serverIPAddress = InetAddress.getLocalHost();
+}
+return serverIPAddress;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9e805bd..53ef428 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2387,6 +2387,11 @@ public class HiveConf extends Configuration {
   "Channel logging level for remote Spark driver.  One of {DEBUG, ERROR, 
INFO, TRACE, WARN}."),
 SPARK_RPC_SASL_MECHANISM("hive.spark.client.rpc.sasl.mechanisms", 
"DIGEST-MD5",
   "Name of the SASL mechanism to use for authentication."),
+SPARK_RPC_SERVER_ADDRESS("hive.spark.client.rpc.server.address", "",
+  "The server address of HiverServer2 host to be used for communication 
between Hive client and remote Spark driver. " + 
+  "Default is empty, which means the address will be determined in the 
same way as for hive.server2.thrift.bind.host." +
+  "This is only necessary if the host has mutiple network addresses and if 
a different network address other than " +
+  "hive.server2.thrift.bind.host is to be used."),
 SPARK_DYNAMIC_PARTITION_PRUNING(
 "hive.spark.dynamic.partition.pruning", false,
 "When dynamic pruning is enabled, joins on partition keys will be 
processed by writing\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java 
b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index 8434965..d54f12c 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -35,6 +35,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.common.ServerUtils;
 import org.apache.hive.service.AbstractService;
 import org.apache.hive.service.ServiceException;
 import org.apache.hive.service.ServiceUtils;
@@ -160,21 +161,19 @@ public abstract class ThriftCLIService 

hive git commit: HIVE-12554: Fix Spark branch build after merge [Spark Branch] (Rui via Xuefu)

2015-12-01 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/spark 79035f1c5 -> 1a87bcc0f


HIVE-12554: Fix Spark branch build after merge [Spark Branch] (Rui via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a87bcc0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a87bcc0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a87bcc0

Branch: refs/heads/spark
Commit: 1a87bcc0f27e5a819035ac67fd68ace4c41301e9
Parents: 79035f1
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Tue Dec 1 10:49:04 2015 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Tue Dec 1 10:49:04 2015 -0800

--
 .../apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java  | 3 ++-
 ql/src/test/results/clientpositive/gen_udf_example_add10.q.out| 1 +
 .../test/results/clientpositive/spark/gen_udf_example_add10.q.out | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
index d215873..ec0fdea 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java
@@ -27,6 +27,7 @@ import java.util.Properties;
 import java.util.Set;
 
 import org.apache.commons.compress.utils.CharsetNames;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -133,7 +134,7 @@ public class HiveSparkClientFactory {
 LOG.info(String.format(
   "load yarn property from hive configuration in %s mode (%s -> %s).",
   sparkMaster, propertyName, value));
-  } else if (propertyName.equals(HiveConf.ConfVars.HADOOPFS.varname)) {
+  } else if 
(propertyName.equals(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)) {
 String value = hiveConf.get(propertyName);
 if (value != null && !value.isEmpty()) {
   sparkConf.put("spark.hadoop." + propertyName, value);

http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out
--
diff --git a/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out 
b/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out
index 984554d..cab2ec8 100644
--- a/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out
+++ b/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out
@@ -43,6 +43,7 @@ STAGE PLANS:
 key expressions: _col0 (type: int), _col1 (type: double)
 sort order: -+
 Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
+TopN Hash Memory Usage: 0.1
   Reduce Operator Tree:
 Select Operator
   expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 
(type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out 
b/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out
index 05ec1f5..493d0a4 100644
--- a/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out
+++ b/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out
@@ -48,6 +48,7 @@ STAGE PLANS:
   key expressions: _col0 (type: int), _col1 (type: double)
   sort order: -+
   Statistics: Num rows: 2 Data size: 30 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
 Reducer 2 
 Reduce Operator Tree:
   Select Operator



[3/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)

2015-11-30 Thread xuefu
HIVE-12184: DESCRIBE of fully qualified table fails when db and table name 
match and non-default database is in use (Naveen via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9ca6870
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9ca6870
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9ca6870

Branch: refs/heads/master
Commit: e9ca6870df889e03e8fa6888d7fbb51c4fbaf20a
Parents: 3a17d42
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Mon Nov 30 21:37:11 2015 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Mon Nov 30 21:37:11 2015 -0800

--
 .../hive/ql/parse/DDLSemanticAnalyzer.java  | 226 ---
 .../apache/hadoop/hive/ql/parse/HiveParser.g|  23 +-
 .../test/queries/clientnegative/desc_failure4.q |   5 +
 .../queries/clientnegative/describe_xpath1.q|   2 +-
 .../queries/clientnegative/describe_xpath2.q|   2 +-
 .../queries/clientnegative/describe_xpath3.q|   2 +-
 .../queries/clientnegative/describe_xpath4.q|   2 +-
 .../alter_partition_update_status.q |  12 +-
 .../alter_table_invalidate_column_stats.q   |  74 ++--
 .../clientpositive/alter_table_update_status.q  |  10 +-
 .../queries/clientpositive/analyze_tbl_part.q   |   8 +-
 .../queries/clientpositive/colstats_all_nulls.q |   4 +-
 .../clientpositive/columnstats_part_coltype.q   |  42 +-
 .../clientpositive/columnstats_partlvl.q|  12 +-
 .../clientpositive/columnstats_partlvl_dp.q |  20 +-
 .../queries/clientpositive/compustat_avro.q |   4 +-
 .../clientpositive/confirm_initial_tbl_stats.q  |  22 +-
 .../queries/clientpositive/describe_syntax.q|  10 +-
 .../queries/clientpositive/describe_table.q |  64 ++-
 .../queries/clientpositive/describe_xpath.q |  12 +-
 .../extrapolate_part_stats_full.q   |   2 +-
 .../extrapolate_part_stats_partial.q|   4 +-
 .../extrapolate_part_stats_partial_ndv.q|  44 +--
 .../clientpositive/partition_coltype_literals.q |   4 +-
 .../queries/clientpositive/stats_only_null.q|   2 +-
 .../results/clientnegative/desc_failure3.q.out  |   2 +-
 .../results/clientnegative/desc_failure4.q.out  |  21 +
 .../clientnegative/describe_xpath1.q.out|   2 +-
 .../clientnegative/describe_xpath2.q.out|   2 +-
 .../clientnegative/describe_xpath3.q.out|   2 +-
 .../clientnegative/describe_xpath4.q.out|   2 +-
 .../clientnegative/drop_database_cascade.q.out  |   2 +-
 .../alter_partition_update_status.q.out |  20 +-
 .../alter_table_invalidate_column_stats.q.out   | 144 +++
 .../alter_table_update_status.q.out |  20 +-
 .../results/clientpositive/ambiguitycheck.q.out |   4 +-
 .../clientpositive/analyze_tbl_part.q.out   |  12 +-
 .../clientpositive/colstats_all_nulls.q.out |   8 +-
 .../columnstats_part_coltype.q.out  |  84 ++--
 .../clientpositive/columnstats_partlvl.q.out|  24 +-
 .../clientpositive/columnstats_partlvl_dp.q.out |  40 +-
 .../results/clientpositive/compustat_avro.q.out |   8 +-
 .../confirm_initial_tbl_stats.q.out |  44 +--
 .../clientpositive/describe_syntax.q.out|  20 +-
 .../results/clientpositive/describe_table.q.out | 390 ++-
 .../results/clientpositive/describe_xpath.q.out |  24 +-
 .../extrapolate_part_stats_full.q.out   |   4 +-
 .../extrapolate_part_stats_partial.q.out|   8 +-
 .../extrapolate_part_stats_partial_ndv.q.out|  88 ++---
 .../clientpositive/llap/stats_only_null.q.out   |   4 +-
 .../partition_coltype_literals.q.out|   8 +-
 .../clientpositive/spark/stats_only_null.q.out  |   4 +-
 .../clientpositive/stats_only_null.q.out|   4 +-
 .../clientpositive/tez/stats_only_null.q.out|   4 +-
 54 files changed, 999 insertions(+), 612 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
index eea2fcc..757542d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
@@ -1717,158 +1717,65 @@ public class DDLSemanticAnalyzer extends 
BaseSemanticAnalyzer {
   }
 }
 
-// assume the first component of DOT delimited name is tableName
-// get the attemptTableName
-static public String getAttemptTableName(Hive db, String qualifiedName, 
boolean isColumn)
-throws SemanticException {
-  // check whether the name starts with table
-  // DESCRIBE table
- 

[1/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)

2015-11-30 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master 3a17d4230 -> e9ca6870d


http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
--
diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out 
b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
index 3ef6bc0..f0d8ff2 100644
--- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
+++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out
@@ -8,10 +8,10 @@ key   string  default
 value  string  default 
 
  A masked pattern was here 
-PREHOOK: query: describe formatted src.key
+PREHOOK: query: describe formatted src key
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@src
-POSTHOOK: query: describe formatted src.key
+POSTHOOK: query: describe formatted src key
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
@@ -27,10 +27,10 @@ key string  default
 value  string  default 
 
  A masked pattern was here 
-PREHOOK: query: describe formatted src1.value
+PREHOOK: query: describe formatted src1 value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@src1
-POSTHOOK: query: describe formatted src1.value
+POSTHOOK: query: describe formatted src1 value
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src1
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
@@ -45,10 +45,10 @@ POSTHOOK: Input: default@src_json
 json   string  default 
 
  A masked pattern was here 
-PREHOOK: query: describe formatted src_json.json
+PREHOOK: query: describe formatted src_json json
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@src_json
-POSTHOOK: query: describe formatted src_json.json
+POSTHOOK: query: describe formatted src_json json
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src_json
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
@@ -64,10 +64,10 @@ key string  default
 value  string  default 
 
  A masked pattern was here 
-PREHOOK: query: describe formatted src_sequencefile.value
+PREHOOK: query: describe formatted src_sequencefile value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@src_sequencefile
-POSTHOOK: query: describe formatted src_sequencefile.value
+POSTHOOK: query: describe formatted src_sequencefile value
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src_sequencefile
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
@@ -83,10 +83,10 @@ key int
 value  string  
 
  A masked pattern was here 
-PREHOOK: query: describe formatted srcbucket.value
+PREHOOK: query: describe formatted srcbucket value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@srcbucket
-POSTHOOK: query: describe formatted srcbucket.value
+POSTHOOK: query: describe formatted srcbucket value
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@srcbucket
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 
@@ -102,10 +102,10 @@ key   int
 value  string  
 
  A masked pattern was here 
-PREHOOK: query: describe formatted srcbucket2.value
+PREHOOK: query: describe formatted srcbucket2 value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@srcbucket2
-POSTHOOK: query: describe formatted srcbucket2.value
+POSTHOOK: query: describe formatted srcbucket2 value
 POSTHOOK: type: DESCTABLE
 POSTHOOK: 

[2/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)

2015-11-30 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out 
b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out
index 6ae4f25..f3c10ee 100644
--- 
a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out
+++ 
b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out
@@ -199,55 +199,55 @@ POSTHOOK: Input: statsdb1@testpart1
 POSTHOOK: Input: statsdb1@testpart1@part=part1
 POSTHOOK: Input: statsdb1@testpart1@part=part2
  A masked pattern was here 
-PREHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 
'part1')
+PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part1') col1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: statsdb1@testpart1
-POSTHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 
'part1')
+POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part1') col1
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testpart1
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 

 
 col1   int 27  484 
0   8   

from deserializer   
-PREHOOK: query: describe formatted statsdb1.testpart1 col2 partition (part = 
'part1')
+PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part1') col2
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: statsdb1@testpart1
-POSTHOOK: query: describe formatted statsdb1.testpart1 col2 partition (part = 
'part1')
+POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part1') col2
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testpart1
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 

 
 col2   string  
0   12  6.7 
7   
from deserializer   
-PREHOOK: query: describe formatted statsdb1.testpart1 col3 partition (part = 
'part1')
+PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part1') col3
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: statsdb1@testpart1
-POSTHOOK: query: describe formatted statsdb1.testpart1 col3 partition (part = 
'part1')
+POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part1') col3
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testpart1
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 

 
 col3   string  
0   1   4.0 
4   
from deserializer   
-PREHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 
'part2')
+PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part2') col1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: statsdb1@testpart1
-POSTHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 
'part2')
+POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 
'part2') col1
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testpart1
 # col_name data_type   min max 
num_nulls   distinct_count  avg_col_len 
max_col_len num_trues   num_falses  
comment 

 
 col1   int 27  484 
0   18  

hive git commit: HIVE-12512: Include driver logs in execution-level Operation logs (Mohit via Xuefu)

2015-11-30 Thread xuefu
Repository: hive
Updated Branches:
  refs/heads/master e9ca6870d -> be410d24f


HIVE-12512: Include driver logs in execution-level Operation logs (Mohit via 
Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be410d24
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be410d24
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be410d24

Branch: refs/heads/master
Commit: be410d24fe7e6598792b672d3fad950ed877a0b4
Parents: e9ca687
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Mon Nov 30 21:40:50 2015 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Mon Nov 30 21:40:50 2015 -0800

--
 .../service/cli/operation/TestOperationLoggingAPIWithMr.java  | 7 ---
 .../service/cli/operation/TestOperationLoggingAPIWithTez.java | 6 +++---
 .../apache/hive/service/cli/operation/LogDivertAppender.java  | 3 ++-
 3 files changed, 9 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java
 
b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java
index 0155b75..d21571e 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java
@@ -36,7 +36,7 @@ import org.junit.Test;
  * TestOperationLoggingAPIWithMr
  * Test the FetchResults of TFetchType.LOG in thrift level in MR mode.
  */
-public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase{
+public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase 
{
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
@@ -45,10 +45,11 @@ public class TestOperationLoggingAPIWithMr extends 
OperationLoggingAPITestBase{
   "Parsing command",
   "Parse Completed",
   "Starting Semantic Analysis",
-  "Semantic Analysis Completed",
-  "Starting command"
 };
 expectedLogsExecution = new String[]{
+  "Total jobs",
+  "Starting command",
+  "Semantic Analysis Completed",
   "Number of reduce tasks determined at compile time",
   "number of splits",
   "Submitting tokens for job",

http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
 
b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
index ab29861..3ffc3a4 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
@@ -20,11 +20,11 @@ public class TestOperationLoggingAPIWithTez extends 
OperationLoggingAPITestBase
 expectedLogsVerbose = new String[]{
   "Parsing command",
   "Parse Completed",
-  "Starting Semantic Analysis",
-  "Semantic Analysis Completed",
-  "Starting command"
+  "Starting Semantic Analysis"
 };
 expectedLogsExecution = new String[]{
+  "Starting command",
+  "Semantic Analysis Completed",
   "Executing on YARN cluster with App id",
   "Setting Tez DAG access"
 };

http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java 
b/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
index 9cb6439..7531778 100644
--- 
a/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
+++ 
b/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
@@ -23,6 +23,7 @@ import java.io.OutputStreamWriter;
 import java.io.Serializable;
 import java.util.regex.Pattern;
 
+import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.

[83/91] [abbrv] hive git commit: HIVE-12307 - Streaming API TransactionBatch.close() must abort any remaining transactions in the batch(Eugene Koifman, reviewed by Alan Gates)

2015-11-29 Thread xuefu
HIVE-12307 - Streaming API TransactionBatch.close() must abort any remaining 
transactions in the batch(Eugene Koifman, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1ac5a39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1ac5a39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1ac5a39

Branch: refs/heads/spark
Commit: f1ac5a391a18fccf724249038fca73e7b55854e2
Parents: 6d4dfa4
Author: Eugene Koifman 
Authored: Thu Nov 26 11:48:03 2015 -0800
Committer: Eugene Koifman 
Committed: Thu Nov 26 11:48:29 2015 -0800

--
 .../streaming/AbstractRecordWriter.java |  32 ++-
 .../hcatalog/streaming/ConnectionError.java |   3 +-
 .../streaming/DelimitedInputWriter.java |   2 +-
 .../hive/hcatalog/streaming/HiveEndPoint.java   | 211 +--
 .../hcatalog/streaming/StrictJsonWriter.java|   2 +-
 .../hcatalog/streaming/TransactionBatch.java|   1 +
 .../hcatalog/streaming/TransactionError.java|   2 +-
 .../hive/hcatalog/streaming/TestStreaming.java  | 167 +++
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java |   3 +
 9 files changed, 344 insertions(+), 79 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f1ac5a39/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
--
diff --git 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
index 5c15675..0c6b9ea 100644
--- 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
+++ 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
@@ -65,6 +65,8 @@ public abstract class AbstractRecordWriter implements 
RecordWriter {
 
   final AcidOutputFormat outf;
   private Object[] bucketFieldData; // Pre-allocated in constructor. Updated 
on each write.
+  private Long curBatchMinTxnId;
+  private Long curBatchMaxTxnId;
 
   protected AbstractRecordWriter(HiveEndPoint endPoint, HiveConf conf)
   throws ConnectionError, StreamingException {
@@ -98,6 +100,12 @@ public abstract class AbstractRecordWriter implements 
RecordWriter {
 }
   }
 
+  /**
+   * used to tag error msgs to provied some breadcrumbs
+   */
+  String getWatermark() {
+return partitionPath + " txnIds[" + curBatchMinTxnId + "," + 
curBatchMaxTxnId + "]";
+  }
   // return the column numbers of the bucketed columns
   private List getBucketColIDs(List bucketCols, 
List cols) {
 ArrayList result =  new ArrayList(bucketCols.size());
@@ -164,22 +172,32 @@ public abstract class AbstractRecordWriter implements 
RecordWriter {
   throws StreamingIOFailure, SerializationError {
 try {
   LOG.debug("Creating Record updater");
+  curBatchMinTxnId = minTxnId;
+  curBatchMaxTxnId = maxTxnID;
   updaters = createRecordUpdaters(totalBuckets, minTxnId, maxTxnID);
 } catch (IOException e) {
-  LOG.error("Failed creating record updater", e);
-  throw new StreamingIOFailure("Unable to get new record Updater", e);
+  String errMsg = "Failed creating RecordUpdaterS for " + getWatermark();
+  LOG.error(errMsg, e);
+  throw new StreamingIOFailure(errMsg, e);
 }
   }
 
   @Override
   public void closeBatch() throws StreamingIOFailure {
-try {
-  for (RecordUpdater updater : updaters) {
+boolean haveError = false;
+for (RecordUpdater updater : updaters) {
+  try {
+//try not to leave any files open
 updater.close(false);
   }
-  updaters.clear();
-} catch (IOException e) {
-  throw new StreamingIOFailure("Unable to close recordUpdater", e);
+  catch(Exception ex) {
+haveError = true;
+LOG.error("Unable to close " + updater + " due to: " + 
ex.getMessage(), ex);
+  }
+}
+updaters.clear();
+if(haveError) {
+  throw new StreamingIOFailure("Encountered errors while closing (see 
logs) " + getWatermark());
 }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f1ac5a39/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java
--
diff --git 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java
 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java
index ffa51c9..03f6a44 100644
--- 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java
+++ 

[08/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index 2c14065..fa80956 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -134,10 +134,14 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 key (type: int)
-1 key (type: int)
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+  Spark HashTable Sink Operator
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
 Local Work:
   Map Reduce Local Work
 
@@ -153,27 +157,31 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: int)
-1 key (type: int)
-  outputColumnNames: _col0, _col1, _col7
-  input vertices:
-0 Map 1
-  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: string), 
_col7 (type: string)
-outputColumnNames: _col0, _col1, _col2
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+  Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+outputColumnNames: _col0, _col1, _col3
+input vertices:
+  0 Map 1
 Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: 
string), _col3 (type: string)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
+table:
+input format: 
org.apache.hadoop.mapred.TextInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Local Work:
   Map Reduce Local Work
 
@@ -211,10 +219,14 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 _col0 (type: int)
-1 key (type: int)
+Select Operator
+  expressions: key (type: int)
+  

[25/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out 
b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
index 8e7078f..f6323f2 100644
--- a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
@@ -128,11 +128,15 @@ STAGE PLANS:
 Map 1 
 Map Operator Tree:
 TableScan
-  alias: s3
+  alias: s1
   Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
 Map Operator Tree:
 TableScan
   alias: s1
@@ -140,22 +144,26 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-Merge Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: int)
-1 key (type: int)
-  Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+  Merge Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Statistics: Num rows: 133 Data size: 1411 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
   Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: bigint)
+  Reduce Output Operator
+sort order: 
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: bigint)
 Execution mode: llap
 Reducer 2 
 Execution mode: uber
@@ -203,14 +211,32 @@ STAGE PLANS:
   Stage: Stage-1
 Tez
   Edges:
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+Reducer 5 <- Map 4 (SIMPLE_EDGE)
  A masked pattern was here 
   Vertices:
 Map 1 
 Map Operator Tree:
 TableScan
+  alias: vt1
+  Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+Execution mode: 

[03/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out 
b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
index 180787b..b1850b6 100644
--- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
@@ -81,12 +81,16 @@ STAGE PLANS:
   Filter Operator
 predicate: p_name is not null (type: boolean)
 Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: p_name (type: string)
-  sort order: +
-  Map-reduce partition columns: p_name (type: string)
+Select Operator
+  expressions: p_partkey (type: int), p_name (type: 
string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), 
p_size (type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
   Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
+  Reduce Output Operator
+key expressions: _col1 (type: string)
+sort order: +
+Map-reduce partition columns: _col1 (type: string)
+Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
 Map 3 
 Map Operator Tree:
 TableScan
@@ -95,12 +99,16 @@ STAGE PLANS:
   Filter Operator
 predicate: p2_name is not null (type: boolean)
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-Reduce Output Operator
-  key expressions: p2_name (type: string)
-  sort order: +
-  Map-reduce partition columns: p2_name (type: string)
+Select Operator
+  expressions: p2_partkey (type: int), p2_name (type: 
string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: 
string), p2_size (type: int), p2_container (type: string), p2_retailprice 
(type: double), p2_comment (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
   Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
-  value expressions: p2_partkey (type: int), p2_mfgr 
(type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: 
int), p2_container (type: string), p2_retailprice (type: double), p2_comment 
(type: string)
+  Reduce Output Operator
+key expressions: _col1 (type: string)
+sort order: +
+Map-reduce partition columns: _col1 (type: string)
+Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
 Map 4 
 Map Operator Tree:
 TableScan
@@ -109,12 +117,16 @@ STAGE PLANS:
   Filter Operator
 predicate: p3_name is not null (type: boolean)
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-Reduce Output Operator
-  key expressions: p3_name (type: string)
-  sort order: +
-  Map-reduce partition columns: p3_name (type: string)
+Select Operator
+  expressions: p3_partkey (type: int), p3_name (type: 
string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: 
string), p3_size (type: int), p3_container (type: string), p3_retailprice 
(type: double), p3_comment (type: string)
+  

[64/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out 
b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
index eeb18b0..93a7ca4 100644
--- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out
@@ -139,6 +139,8 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
 Spark
+  Edges:
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -165,37 +167,14 @@ STAGE PLANS:
 expressions: _col0 (type: int), _col7 (type: string)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 825 Data size: 8764 Basic stats: 
COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
-  GlobalTableId: 1
- A masked pattern was here 
-  NumFilesPerFileSink: 1
-  Static Partition Specification: ds=1/
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
   Statistics: Num rows: 825 Data size: 8764 Basic 
stats: COMPLETE Column stats: NONE
- A masked pattern was here 
-  table:
-  input format: 
org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  properties:
-SORTBUCKETCOLSPREFIX TRUE
-bucket_count 16
-bucket_field_name key
-columns key,value
-columns.comments 
-columns.types int:string
- A masked pattern was here 
-name default.test_table3
-partition_columns ds
-partition_columns.types string
-serialization.ddl struct test_table3 { i32 
key, string value}
-serialization.format 1
-serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- A masked pattern was here 
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-  name: default.test_table3
-  TotalFiles: 1
-  GatherStats: true
-  MultiFileSpray: false
+  tag: -1
+  value expressions: _col1 (type: string)
+  auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -249,6 +228,44 @@ STAGE PLANS:
   name: default.test_table1
 Truncated Path -> Alias:
   /test_table1/ds=1 [a]
+Reducer 2 
+Needs Tagging: false
+Reduce Operator Tree:
+  Select Operator
+expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 825 Data size: 8764 Basic stats: 
COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  GlobalTableId: 1
+ A masked pattern was here 
+  NumFilesPerFileSink: 16
+  Static Partition Specification: ds=1/
+  Statistics: Num rows: 825 Data size: 8764 Basic stats: 
COMPLETE Column stats: NONE
+ A masked pattern was here 
+  table:
+  input format: org.apache.hadoop.mapred.TextInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+  properties:
+SORTBUCKETCOLSPREFIX TRUE
+bucket_count 16
+bucket_field_name key
+columns key,value
+columns.comments 
+columns.types int:string
+ A masked pattern was here 
+name default.test_table3
+partition_columns ds
+partition_columns.types string
+serialization.ddl struct test_table3 { i32 key, string 
value}
+   

[50/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_join26.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out 
b/ql/src/test/results/clientpositive/auto_join26.q.out
index 94ab76f..5f9531b 100644
--- a/ql/src/test/results/clientpositive/auto_join26.q.out
+++ b/ql/src/test/results/clientpositive/auto_join26.q.out
@@ -28,11 +28,11 @@ STAGE PLANS:
   Stage: Stage-6
 Map Reduce Local Work
   Alias -> Map Local Tables:
-$hdt$_0:$hdt$_1:x 
+$hdt$_0:$hdt$_0:x 
   Fetch Operator
 limit: -1
   Alias -> Map Local Operator Tree:
-$hdt$_0:$hdt$_1:x 
+$hdt$_0:$hdt$_0:x 
   TableScan
 alias: x
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
@@ -67,24 +67,20 @@ STAGE PLANS:
   keys:
 0 _col0 (type: string)
 1 _col0 (type: string)
-  outputColumnNames: _col1
+  outputColumnNames: _col0
   Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col1 (type: string)
-outputColumnNames: _col0
+  Group By Operator
+aggregations: count(1)
+keys: _col0 (type: string)
+mode: hash
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-Group By Operator
-  aggregations: count(1)
-  keys: _col0 (type: string)
-  mode: hash
-  outputColumnNames: _col0, _col1
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: _col0 (type: string)
-sort order: +
-Map-reduce partition columns: _col0 (type: string)
-Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col1 (type: bigint)
+  value expressions: _col1 (type: bigint)
   Local Work:
 Map Reduce Local Work
   Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_join32.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out 
b/ql/src/test/results/clientpositive/auto_join32.q.out
index 161ab6b..9b32047 100644
--- a/ql/src/test/results/clientpositive/auto_join32.q.out
+++ b/ql/src/test/results/clientpositive/auto_join32.q.out
@@ -35,21 +35,25 @@ STAGE PLANS:
   Stage: Stage-5
 Map Reduce Local Work
   Alias -> Map Local Tables:
-s 
+$hdt$_0:s 
   Fetch Operator
 limit: -1
   Alias -> Map Local Operator Tree:
-s 
+$hdt$_0:s 
   TableScan
 alias: s
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
 Filter Operator
   predicate: name is not null (type: boolean)
   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-  HashTable Sink Operator
-keys:
-  0 name (type: string)
-  1 name (type: string)
+  Select Operator
+expressions: name (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+HashTable Sink Operator
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
 
   Stage: Stage-2
 Map Reduce
@@ -60,25 +64,29 @@ STAGE PLANS:
 Filter Operator
   predicate: name is not null (type: boolean)
   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-  Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 name (type: string)
-  1 name (type: string)
-outputColumnNames: _col0, _col8
+  Select Operator
+expressions: name (type: string), registration (type: string)
+outputColumnNames: _col0, 

[13/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out
--
diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out 
b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out
index c0a8959..441338e 100644
--- a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out
+++ b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out
@@ -46,6 +46,9 @@ POSTHOOK: query: load data local inpath 
'../../data/files/smbbucket_3.rc' overwr
 POSTHOOK: type: LOAD
  A masked pattern was here 
 POSTHOOK: Output: default@smb_bucket_3
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[22][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Stage-4:MAPRED' is a cross product
 PREHOOK: query: explain 
 select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key 
= b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c 
join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) 
where t2.key=5
 PREHOOK: type: QUERY
@@ -68,123 +71,107 @@ STAGE PLANS:
 Filter Operator
   predicate: (key = 5) (type: boolean)
   Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: 5 (type: int)
-sort order: +
-Map-reduce partition columns: 5 (type: int)
+  Select Operator
 Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: NONE
   TableScan
 alias: b
 Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
-  predicate: (key = 5) (type: boolean)
+  predicate: (5 = key) (type: boolean)
   Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: 5 (type: int)
-sort order: +
-Map-reduce partition columns: 5 (type: int)
+  Select Operator
 Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 25 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Operator Tree:
 Join Operator
   condition map:
Inner Join 0 to 1
   keys:
-0 key (type: int)
-1 key (type: int)
+0 
+1 
   Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column 
stats: NONE
-  Select Operator
-expressions: 5 (type: int)
-outputColumnNames: _col0
-Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  File Output Operator
+compressed: false
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
 Map Reduce
   Map Operator Tree:
   TableScan
 Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
+  sort order: 
   Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE 
Column stats: NONE
   TableScan
 Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
+  sort order: 
   Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Operator Tree:
 Join Operator
   condition map:
-   Left Outer Join0 to 1
+   Inner Join 0 to 1
   keys:
-0 _col0 (type: int)
-1 _col0 (type: int)
-  

[63/91] [abbrv] hive git commit: HIVE-9599 : remove derby, datanucleus and other not related to jdbc client classes from hive-jdbc-standalone.jar (Ashutosh Chauhan via Thejas Nair)

2015-11-29 Thread xuefu
HIVE-9599 : remove derby, datanucleus and other not related to jdbc client 
classes from hive-jdbc-standalone.jar (Ashutosh Chauhan via Thejas Nair)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1b6600de
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1b6600de
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1b6600de

Branch: refs/heads/spark
Commit: 1b6600de0f908170061628a6b5ed4f072012cc96
Parents: b7281ce
Author: Ashutosh Chauhan 
Authored: Wed Nov 18 15:00:30 2015 -0800
Committer: Ashutosh Chauhan 
Committed: Tue Nov 24 15:06:23 2015 -0800

--
 jdbc/pom.xml | 52 
 1 file changed, 52 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1b6600de/jdbc/pom.xml
--
diff --git a/jdbc/pom.xml b/jdbc/pom.xml
index ea961a4..f8d7dfd 100644
--- a/jdbc/pom.xml
+++ b/jdbc/pom.xml
@@ -189,6 +189,58 @@
   org.antlr:*
   org.slf4j:slf4j-log4j12
   log4j:*
+  antlr:*
+  aopalliance:*
+  asm:*
+  com.google.code.gson:*
+  com.google.inject:*
+  com.google.inject.extensions:*
+  com.jamesmurty.utils:*
+  com.jcraft:*
+  com.jolbox:*
+  commons-beanutils:*
+  commons-cli:*
+  commons-dbcp:*
+  commons-digester:*
+  commons-el:*
+  commons-httpclient:*
+  commons-io:*
+  commons-net:*
+  commons-pool:*
+  com.google.code.findbugs:*
+  com.google.protobuf:*
+  com.sun.xml.bind:*
+  com.thoughtworks.paranamer:*
+  com.twitter:*
+  io.netty:*
+  javax.activation:*
+  javax.inject:*
+  javax.jdo:*
+  javax.mail:*
+  javax.servlet:*
+  javax.servlet.jsp:*
+  javax.transaction:*
+  javax.xml.bind:*
+  javax.xml.stream:*
+  jline:*
+  joda-time:*
+  net.java.dev.jets3t:*
+  org.apache.commons:commons-math3
+  org.apache.curator:*
+  org.apache.derby:*
+  org.apache.directory.api:*
+  org.apache.directory.server:*
+  org.apache.geronimo.specs:*
+  org.apache.zookeeper:*
+  org.codehaus.jackson:*
+  org.codehaus.jettison:*
+  org.datanucleus:*
+  org.fusesource.leveldbjni:*
+  org.htrace:*
+  org.mortbay.jetty:*
+  org.xerial.snappy:*
+  tomcat:*
+  xmlenc:*
 
   
 



[12/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out 
b/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
index d72b4f3..71fe68e 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
@@ -50,10 +50,14 @@ STAGE PLANS:
   Filter Operator
 predicate: UDFToDouble(key) is not null (type: boolean)
 Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 (_col0 + _col5) (type: double)
-1 UDFToDouble(key) (type: double)
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+  Spark HashTable Sink Operator
+keys:
+  0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: 
double)
+  1 UDFToDouble(_col0) (type: double)
 Local Work:
   Map Reduce Local Work
 
@@ -71,24 +75,32 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: key (type: string)
-  sort order: +
-  Map-reduce partition columns: key (type: string)
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 Map 3 
 Map Operator Tree:
 TableScan
-  alias: src2
+  alias: src1
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: key (type: string)
-  sort order: +
-  Map-reduce partition columns: key (type: string)
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 Reducer 2 
 Local Work:
   Map Reduce Local Work
@@ -97,34 +109,30 @@ STAGE PLANS:
 condition map:
  Inner Join 0 to 1
 keys:
-  0 key (type: string)
-  1 key (type: string)
-outputColumnNames: _col0, _col5
+  0 _col0 (type: string)
+  1 _col0 (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
 Filter Operator
-  predicate: (_col0 + _col5) is not null (type: boolean)
+  predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not 
null (type: boolean)
   Statistics: Num rows: 138 Data size: 1465 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
  Inner Join 0 to 1
 keys:
-  0 (_col0 + _col5) (type: double)
-  1 UDFToDouble(key) (type: double)
-outputColumnNames: _col0, _col5, _col10
+  0 (UDFToDouble(_col0) 

[20/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/multiMapJoin2.q.out
--
diff --git a/ql/src/test/results/clientpositive/multiMapJoin2.q.out 
b/ql/src/test/results/clientpositive/multiMapJoin2.q.out
index 46b717f..dee81c2 100644
--- a/ql/src/test/results/clientpositive/multiMapJoin2.q.out
+++ b/ql/src/test/results/clientpositive/multiMapJoin2.q.out
@@ -2079,21 +2079,25 @@ STAGE PLANS:
   Stage: Stage-5
 Map Reduce Local Work
   Alias -> Map Local Tables:
-y 
+$hdt$_1:y 
   Fetch Operator
 limit: -1
   Alias -> Map Local Operator Tree:
-y 
+$hdt$_1:y 
   TableScan
 alias: y
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
-  HashTable Sink Operator
-keys:
-  0 key (type: string)
-  1 key (type: string)
+  Select Operator
+expressions: key (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE 
Column stats: NONE
+HashTable Sink Operator
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
 
   Stage: Stage-2
 Map Reduce
@@ -2104,22 +2108,26 @@ STAGE PLANS:
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 63 Data size: 635 Basic stats: COMPLETE 
Column stats: NONE
-  Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: string)
-  1 key (type: string)
-Statistics: Num rows: 69 Data size: 698 Basic stats: COMPLETE 
Column stats: NONE
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-sort order: 
+  Select Operator
+expressions: key (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 63 Data size: 635 Basic stats: COMPLETE 
Column stats: NONE
+Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
+  Statistics: Num rows: 69 Data size: 698 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+aggregations: count()
+mode: hash
+outputColumnNames: _col0
 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: _col0 (type: bigint)
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: bigint)
   Local Work:
 Map Reduce Local Work
   Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/multi_join_union.q.out
--
diff --git a/ql/src/test/results/clientpositive/multi_join_union.q.out 
b/ql/src/test/results/clientpositive/multi_join_union.q.out
index 466f34b..76c837f 100644
--- a/ql/src/test/results/clientpositive/multi_join_union.q.out
+++ b/ql/src/test/results/clientpositive/multi_join_union.q.out
@@ -53,36 +53,40 @@ src12 b ON (a.key = b.key) JOIN
 (SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON 
c.value = b.value
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-7 is a root stage
-  Stage-5 depends on stages: Stage-7
-  Stage-0 depends on stages: Stage-5
+  Stage-8 is a root stage
+  Stage-6 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-6
 
 STAGE PLANS:
-  Stage: Stage-7
+  Stage: Stage-8
 Map Reduce Local Work
   Alias -> Map Local Tables:
-a 
+$hdt$_0:a 
   Fetch Operator
 limit: -1
-c-subquery1:a-subquery1:src13 
+$hdt$_2-subquery1:$hdt$_2-subquery1:src13 
   Fetch Operator
 limit: -1
-c-subquery2:a-subquery2:src14 
+$hdt$_2-subquery2:$hdt$_2-subquery2:src14 
   Fetch Operator
 limit: -1
   

[47/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out 
b/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out
index 853f641..0c8aa21 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out
@@ -103,35 +103,43 @@ STAGE PLANS:
 Filter Operator
   predicate: (key is not null and value is not null) (type: 
boolean)
   Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: int)
-  1 key (type: int)
-outputColumnNames: _col1
-Reduce Output Operator
-  key expressions: _col1 (type: string)
-  sort order: +
-  Map-reduce partition columns: _col1 (type: string)
+  Select Operator
+expressions: key (type: int), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+Sorted Merge Bucket Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col1
+  Reduce Output Operator
+key expressions: _col1 (type: string)
+sort order: +
+Map-reduce partition columns: _col1 (type: string)
   TableScan
 alias: c
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: value is not null (type: boolean)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: value (type: string)
-sort order: +
-Map-reduce partition columns: value (type: string)
+  Select Operator
+expressions: value (type: string)
+outputColumnNames: _col0
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Operator Tree:
 Join Operator
   condition map:
Inner Join 0 to 1
   keys:
 0 _col1 (type: string)
-1 value (type: string)
+1 _col0 (type: string)
   Group By Operator
 aggregations: count()
 mode: hash
@@ -206,35 +214,43 @@ STAGE PLANS:
 Filter Operator
   predicate: (key is not null and value is not null) (type: 
boolean)
   Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: int)
-  1 key (type: int)
-outputColumnNames: _col1
-Reduce Output Operator
-  key expressions: _col1 (type: string)
-  sort order: +
-  Map-reduce partition columns: _col1 (type: string)
+  Select Operator
+expressions: key (type: int), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+Sorted Merge Bucket Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col1
+  Reduce Output Operator
+key expressions: _col1 (type: string)
+sort order: +
+Map-reduce partition columns: _col1 (type: string)
   TableScan
 alias: d
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: value is not null (type: 

[89/91] [abbrv] hive git commit: HIVE-12338: Add webui to HiveServer2 (Jimmy, reviewed by Mohit, Szehon, Lefty)

2015-11-29 Thread xuefu
HIVE-12338: Add webui to HiveServer2 (Jimmy, reviewed by Mohit, Szehon, Lefty)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2c0c191c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2c0c191c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2c0c191c

Branch: refs/heads/spark
Commit: 2c0c191cdd6b2d1aebe4502e24cc2b3d041bf3ca
Parents: a51e5d4
Author: Jimmy Xiang 
Authored: Thu Nov 19 08:10:29 2015 -0800
Committer: Jimmy Xiang 
Committed: Sun Nov 29 09:55:09 2015 -0800

--
 common/pom.xml  |   5 +
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   5 +
 .../hive/http/AdminAuthorizedServlet.java   |  45 ++
 .../java/org/apache/hive/http/ConfServlet.java  | 101 +
 .../java/org/apache/hive/http/HttpServer.java   | 316 ++
 .../org/apache/hive/http/JMXJsonServlet.java| 412 +++
 pom.xml |   1 +
 ql/pom.xml  |   6 +
 service/pom.xml |  56 +++
 .../hive/service/cli/operation/Operation.java   |   2 +-
 .../service/cli/operation/OperationManager.java |  26 +-
 .../service/cli/operation/SQLOperation.java |   8 +-
 .../service/cli/session/HiveSessionBase.java|   4 +
 .../service/cli/session/HiveSessionImpl.java|  12 +
 .../service/cli/session/SessionManager.java |  16 +-
 .../apache/hive/service/server/HiveServer2.java |  47 +++
 .../hive-webapps/hiveserver2/hiveserver2.jsp| 186 +
 .../hive-webapps/hiveserver2/index.html |  20 +
 .../static/css/bootstrap-theme.min.css  |  10 +
 .../hive-webapps/static/css/bootstrap.min.css   |   9 +
 .../resources/hive-webapps/static/css/hive.css  |  24 ++
 .../fonts/glyphicons-halflings-regular.eot  | Bin 0 -> 14079 bytes
 .../fonts/glyphicons-halflings-regular.svg  | 228 ++
 .../fonts/glyphicons-halflings-regular.ttf  | Bin 0 -> 29512 bytes
 .../fonts/glyphicons-halflings-regular.woff | Bin 0 -> 16448 bytes
 .../hive-webapps/static/hive_logo.jpeg  | Bin 0 -> 5616 bytes
 spark-client/pom.xml|   6 +
 27 files changed, 1529 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/common/pom.xml
--
diff --git a/common/pom.xml b/common/pom.xml
index ee74282..72bb550 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -56,6 +56,11 @@
   ${commons-lang.version}
 
 
+  org.eclipse.jetty.aggregate
+  jetty-all
+  ${jetty.version}
+
+
   joda-time
   joda-time
   ${joda.version}

http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index db942b0..9e805bd 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1847,6 +1847,11 @@ public class HiveConf extends Configuration {
 HIVE_SERVER2_PARALLEL_COMPILATION("hive.driver.parallel.compilation", 
false, "Whether to\n" +
 "enable parallel compilation between sessions on HiveServer2. The 
default is false."),
 
+// HiveServer2 WebUI
+HIVE_SERVER2_WEBUI_BIND_HOST("hive.server2.webui.host", "0.0.0.0", "The 
host address the HiveServer2 WebUI will listen on"),
+HIVE_SERVER2_WEBUI_PORT("hive.server2.webui.port", 10002, "The port the 
HiveServer2 WebUI will listen on"),
+HIVE_SERVER2_WEBUI_MAX_THREADS("hive.server2.webui.max.threads", 50, "The 
max HiveServer2 WebUI threads"),
+
 // Tez session settings
 HIVE_SERVER2_TEZ_DEFAULT_QUEUES("hive.server2.tez.default.queues", "",
 "A list of comma separated values corresponding to YARN queues of the 
same name.\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java
--
diff --git a/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java 
b/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java
new file mode 100644
index 000..5d957c2
--- /dev/null
+++ b/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * 

[56/91] [abbrv] hive git commit: HIVE-12456: QueryId can't be stored in the configuration of the SessionState since multiple queries can run in a single session (Aihua Xu, reviewed by Mohit)

2015-11-29 Thread xuefu
HIVE-12456: QueryId can't be stored in the configuration of the SessionState 
since multiple queries can run in a single session (Aihua Xu, reviewed by Mohit)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2604cf26
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2604cf26
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2604cf26

Branch: refs/heads/spark
Commit: 2604cf26ae36c4211bf155e2032398cc7344f641
Parents: f90d798
Author: Aihua Xu 
Authored: Mon Nov 23 12:20:39 2015 -0500
Committer: Aihua Xu 
Committed: Mon Nov 23 12:20:39 2015 -0500

--
 .../cli/operation/ExecuteStatementOperation.java | 15 +--
 .../hive/service/cli/operation/Operation.java| 19 +++
 .../hive/service/cli/operation/SQLOperation.java |  4 ++--
 .../service/cli/session/HiveSessionImpl.java |  1 -
 4 files changed, 18 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2604cf26/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
 
b/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
index 3f2de10..b3d9b52 100644
--- 
a/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
+++ 
b/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
@@ -18,7 +18,6 @@
 package org.apache.hive.service.cli.operation;
 
 import java.sql.SQLException;
-import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.hadoop.hive.ql.processors.CommandProcessor;
@@ -29,13 +28,11 @@ import org.apache.hive.service.cli.session.HiveSession;
 
 public abstract class ExecuteStatementOperation extends Operation {
   protected String statement = null;
-  protected Map confOverlay = new HashMap();
 
   public ExecuteStatementOperation(HiveSession parentSession, String statement,
   Map confOverlay, boolean runInBackground) {
-super(parentSession, OperationType.EXECUTE_STATEMENT, runInBackground);
+super(parentSession, confOverlay, OperationType.EXECUTE_STATEMENT, 
runInBackground);
 this.statement = statement;
-setConfOverlay(confOverlay);
   }
 
   public String getStatement() {
@@ -57,14 +54,4 @@ public abstract class ExecuteStatementOperation extends 
Operation {
 }
 return new HiveCommandOperation(parentSession, statement, processor, 
confOverlay);
   }
-
-  protected Map getConfOverlay() {
-return confOverlay;
-  }
-
-  protected void setConfOverlay(Map confOverlay) {
-if (confOverlay != null) {
-  this.confOverlay = confOverlay;
-}
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2604cf26/service/src/java/org/apache/hive/service/cli/operation/Operation.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/operation/Operation.java 
b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
index d13415e..25cefc2 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
@@ -21,11 +21,14 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 
 import com.google.common.collect.Sets;
+
 import org.apache.hadoop.hive.common.metrics.common.Metrics;
 import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
 import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
@@ -50,8 +53,8 @@ import org.apache.logging.log4j.ThreadContext;
 
 public abstract class Operation {
   // Constants of the key strings for the log4j ThreadContext.
-  private static final String QUERYID = "QueryId";
-  private static final String SESSIONID = "SessionId";
+  public static final String SESSIONID_LOG_KEY = "sessionId";
+  public static final String QUERYID_LOG_KEY = "queryId";
 
   protected final HiveSession parentSession;
   private OperationState state = OperationState.INITIALIZED;
@@ -67,6 +70,7 @@ public abstract class Operation {
   protected volatile Future backgroundHandle;
   protected OperationLog operationLog;
   protected boolean isOperationLogEnabled;
+  protected Map confOverlay = new HashMap();
 
   private long operationTimeout;
   private long lastAccessTime;
@@ -75,7 

[55/91] [abbrv] hive git commit: HIVE-12409 make sure SessionState.initTxnMgr() is thread safe (Eugene Koifman, reviewed by Jason Dere)

2015-11-29 Thread xuefu
HIVE-12409 make sure SessionState.initTxnMgr() is thread safe (Eugene Koifman, 
reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f90d798e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f90d798e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f90d798e

Branch: refs/heads/spark
Commit: f90d798e830d56745c8bc0cfee35741ed66aab90
Parents: 695d905
Author: Eugene Koifman 
Authored: Mon Nov 23 08:20:06 2015 -0800
Committer: Eugene Koifman 
Committed: Mon Nov 23 08:20:06 2015 -0800

--
 ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f90d798e/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index ff875df..5c69fb6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -400,7 +400,7 @@ public class SessionState {
* @return transaction manager
* @throws LockException
*/
-  public HiveTxnManager initTxnMgr(HiveConf conf) throws LockException {
+  public synchronized HiveTxnManager initTxnMgr(HiveConf conf) throws 
LockException {
 if (txnMgr == null) {
   txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
 }



[86/91] [abbrv] hive git commit: HIVE-12008: Hive queries failing when using count(*) on column in view (Yongzhi Chen, reviewed by Szehon ho)

2015-11-29 Thread xuefu
HIVE-12008: Hive queries failing when using count(*) on column in view (Yongzhi 
Chen, reviewed by Szehon ho)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a51e5d4e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a51e5d4e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a51e5d4e

Branch: refs/heads/spark
Commit: a51e5d4e261d6eb334497f768059829e36a99fd3
Parents: 7984738
Author: Yongzhi Chen 
Authored: Sun Nov 29 01:06:22 2015 -0500
Committer: Yongzhi Chen 
Committed: Sun Nov 29 01:06:22 2015 -0500

--
 .../hadoop/hive/ql/exec/UnionOperator.java  |   8 +-
 .../hive/ql/optimizer/ColumnPrunerProcCtx.java  |   2 +-
 .../clientpositive/unionall_unbalancedppd.q |  57 ++-
 .../results/clientpositive/spark/union16.q.out  |  16 +-
 .../results/clientpositive/spark/union2.q.out   |  16 +-
 .../results/clientpositive/spark/union9.q.out   |  16 +-
 .../clientpositive/spark/union_view.q.out   |  24 --
 .../results/clientpositive/tez/union2.q.out |  28 +-
 .../results/clientpositive/tez/union9.q.out |  40 +--
 .../tez/vector_null_projection.q.out|   4 -
 .../test/results/clientpositive/union16.q.out   | 354 ---
 ql/src/test/results/clientpositive/union2.q.out |  32 +-
 ql/src/test/results/clientpositive/union9.q.out |  46 ++-
 .../results/clientpositive/union_view.q.out |  24 --
 .../clientpositive/unionall_unbalancedppd.q.out | 261 +++---
 .../clientpositive/vector_null_projection.q.out |   4 -
 16 files changed, 388 insertions(+), 544 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
index a49097c..ddb23ee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
@@ -62,13 +62,16 @@ public class UnionOperator extends Operator 
implements Serializable {
 int parents = parentOperators.size();
 parentObjInspectors = new StructObjectInspector[parents];
 parentFields = new List[parents];
+int columns = 0;
 for (int p = 0; p < parents; p++) {
   parentObjInspectors[p] = (StructObjectInspector) inputObjInspectors[p];
   parentFields[p] = parentObjInspectors[p].getAllStructFieldRefs();
+  if (p == 0 || parentFields[p].size() < columns) {
+columns = parentFields[p].size();
+  }
 }
 
 // Get columnNames from the first parent
-int columns = parentFields[0].size();
 ArrayList columnNames = new ArrayList(columns);
 for (int c = 0; c < columns; c++) {
   columnNames.add(parentFields[0].get(c).getFieldName());
@@ -81,7 +84,8 @@ public class UnionOperator extends Operator 
implements Serializable {
 }
 
 for (int p = 0; p < parents; p++) {
-  assert (parentFields[p].size() == columns);
+  //When columns is 0, the union operator is empty.
+  assert (columns == 0 || parentFields[p].size() == columns);
   for (int c = 0; c < columns; c++) {
 if (!columnTypeResolvers[c].updateForUnionAll(parentFields[p].get(c)
 .getFieldObjectInspector())) {

http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index b18a034..7befd3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -108,7 +108,7 @@ public class ColumnPrunerProcCtx implements 
NodeProcessorCtx {
 prunList = joinPrunedColLists.get(child).get((byte) tag);
   } else if (child instanceof UnionOperator) {
 List positions = unionPrunedColLists.get(child);
-if (positions != null && positions.size() > 0) {
+if (positions != null) {
   prunList = new ArrayList<>();
   RowSchema oldRS = curOp.getSchema();
   for (Integer pos : positions) {

http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
--
diff --git a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q 
b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
index 360ad11..a704860 100644
--- 

[05/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out 
b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
index 8d3f0d8..6c57ff2 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
@@ -2748,20 +2748,24 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
-Group By Operator
-  aggregations: count(1)
-  keys: key (type: string)
-  mode: final
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: _col0 (type: string)
-sort order: +
-Map-reduce partition columns: _col0 (type: string)
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+aggregations: count(1)
+keys: _col0 (type: string)
+mode: final
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
-tag: 0
-value expressions: _col1 (type: bigint)
-auto parallelism: false
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+  tag: 0
+  value expressions: _col1 (type: bigint)
+  auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -2825,20 +2829,24 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
-Group By Operator
-  aggregations: count(1)
-  keys: key (type: string)
-  mode: final
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: _col0 (type: string)
-sort order: +
-Map-reduce partition columns: _col0 (type: string)
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+aggregations: count(1)
+keys: _col0 (type: string)
+mode: final
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
-tag: 1
-value expressions: _col1 (type: bigint)
-auto parallelism: false
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+  tag: 1
+  value expressions: _col1 (type: bigint)
+  auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -3113,20 +3121,24 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
-

[82/91] [abbrv] hive git commit: HIVE-12496 : Open ServerTransport After MetaStore Initialization (Nemon Lou via Ashutosh Chauhan)

2015-11-29 Thread xuefu
HIVE-12496 : Open ServerTransport After MetaStore Initialization  (Nemon Lou 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6d4dfa40
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6d4dfa40
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6d4dfa40

Branch: refs/heads/spark
Commit: 6d4dfa40bc6a70c7b4d7de0241b1868937d66b00
Parents: a9d3b09
Author: Nemon Lou 
Authored: Thu Nov 26 11:44:34 2015 -0800
Committer: Ashutosh Chauhan 
Committed: Thu Nov 26 11:44:34 2015 -0800

--
 .../java/org/apache/hadoop/hive/metastore/HiveMetaStore.java| 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6d4dfa40/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index a835f6a..00602e1 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -5978,8 +5978,6 @@ public class HiveMetaStore extends ThriftHiveMetastore {
   boolean useCompactProtocol = 
conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_COMPACT_PROTOCOL);
   useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
 
-  TServerTransport serverTransport = tcpKeepAlive ?
-  new TServerSocketKeepAlive(port) : new TServerSocket(port);
 
   TProcessor processor;
   TTransportFactory transFactory;
@@ -6027,6 +6025,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
   LOG.info("Starting DB backed MetaStore Server");
 }
   }
+ 
+   TServerTransport serverTransport = tcpKeepAlive ?
+new TServerSocketKeepAlive(port) : new TServerSocket(port);
 
   TThreadPoolServer.Args args = new TThreadPoolServer.Args(serverTransport)
   .processor(processor)



[60/91] [abbrv] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ab98ffc2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ab98ffc2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ab98ffc2

Branch: refs/heads/spark
Commit: ab98ffc2688abbc75de13524ca46848e566354ef
Parents: 306a640
Author: Pengcheng Xiong 
Authored: Tue Nov 24 17:09:40 2015 +0800
Committer: Pengcheng Xiong 
Committed: Tue Nov 24 17:09:40 2015 +0800

--
 .../hadoop/hive/common/StatsSetupConst.java |  13 -
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   8 +-
 data/conf/llap/hive-site.xml|   2 +-
 data/conf/spark/standalone/hive-site.xml|   2 +-
 data/conf/spark/yarn-client/hive-site.xml   |   2 +-
 data/conf/tez/hive-site.xml |   4 +-
 .../hive/ql/stats/CounterStatsAggregator.java   |  82 
 .../ql/stats/CounterStatsAggregatorSpark.java   |  58 ---
 .../ql/stats/CounterStatsAggregatorTez.java |  79 
 .../hive/ql/stats/CounterStatsPublisher.java|  66 ---
 .../hadoop/hive/ql/stats/StatsFactory.java  |  11 -
 .../test/queries/clientpositive/index_bitmap3.q |   1 -
 .../queries/clientpositive/index_bitmap_auto.q  |   1 -
 .../test/queries/clientpositive/stats_counter.q |  16 -
 .../clientpositive/stats_counter_partitioned.q  |  45 --
 .../clientpositive/llap/stats_counter.q.out | 102 
 .../llap/stats_counter_partitioned.q.out| 465 ---
 .../clientpositive/spark/stats_counter.q.out| 102 
 .../spark/stats_counter_partitioned.q.out   | 465 ---
 .../results/clientpositive/stats_counter.q.out  | 102 
 .../stats_counter_partitioned.q.out | 465 ---
 .../clientpositive/tez/metadataonly1.q.out  |  72 +--
 .../clientpositive/tez/optimize_nullscan.q.out  |  90 ++--
 .../clientpositive/tez/stats_counter.q.out  | 102 
 .../tez/stats_counter_partitioned.q.out | 465 ---
 25 files changed, 88 insertions(+), 2732 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java 
b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index 0a44bde..2ff76ee 100644
--- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -31,19 +31,6 @@ import java.util.Map;
 public class StatsSetupConst {
 
   public enum StatDB {
-counter {
-  @Override
-  public String getPublisher(Configuration conf) {
-return "org.apache.hadoop.hive.ql.stats.CounterStatsPublisher"; }
-  @Override
-  public String getAggregator(Configuration conf) {
-if (HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
-  return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez";
-} else if (HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
-  return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorSpark";
-}
-return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregator"; }
-},
 fs {
   @Override
   public String getPublisher(Configuration conf) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f48403b..fffedd9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1354,10 +1354,10 @@ public class HiveConf extends Configuration {
 // Statistics
 HIVESTATSAUTOGATHER("hive.stats.autogather", true,
 "A flag to gather statistics automatically during the INSERT OVERWRITE 
command."),
-HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("counter", 
"custom", "fs"),
+HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", 
"fs"),
 "The storage that stores temporary Hive statistics. In filesystem 
based statistics collection ('fs'), \n" +
 "each task writes statistics it has collected in a file on the 
filesystem, which will be aggregated \n" +
-"after the job has finished. Supported values are fs (filesystem), 
counter, and custom as defined in 

[07/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out 
b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
index bcef03c..d8ade07 100644
--- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
@@ -113,10 +113,14 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 5 Data size: 35 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 key (type: int)
-1 key (type: int)
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 5 Data size: 35 Basic stats: 
COMPLETE Column stats: NONE
+  Spark HashTable Sink Operator
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
 Local Work:
   Map Reduce Local Work
 
@@ -134,26 +138,30 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 42 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: int)
-1 key (type: int)
-  outputColumnNames: _col0, _col1, _col7
-  input vertices:
-0 Map 1
-  Statistics: Num rows: 46 Data size: 404 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: int), concat(_col1, _col7) 
(type: string)
-outputColumnNames: _col0, _col1
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 42 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
+  Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+outputColumnNames: _col0, _col1, _col4
+input vertices:
+  0 Map 1
 Statistics: Num rows: 46 Data size: 404 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
+Select Operator
+  expressions: _col0 (type: int), concat(_col1, _col4) 
(type: string)
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 46 Data size: 404 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type: string)
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 46 Data size: 404 Basic 
stats: COMPLETE Column stats: NONE
+value expressions: _col1 (type: string)
 Local Work:
   Map Reduce Local Work
 Reducer 3 
@@ -285,10 +293,14 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 key (type: int)
-1 key (type: int)
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
+  Spark HashTable Sink Operator
+

[26/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out 
b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out
index 36a032a..3b634be 100644
--- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out
@@ -788,38 +788,46 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 263 Data size: 6248 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: string)
-1 key (type: string)
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 289 Data size: 6872 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 263 Data size: 6248 Basic stats: 
COMPLETE Column stats: NONE
+  Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: string)
+  1 _col0 (type: string)
+input vertices:
+  1 Map 3
+Statistics: Num rows: 289 Data size: 6872 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
   Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: bigint)
+  Reduce Output Operator
+sort order: 
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: bigint)
 Execution mode: llap
 Map 3 
 Map Operator Tree:
 TableScan
-  alias: p2
+  alias: p1
   Statistics: Num rows: 525 Data size: 12474 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 263 Data size: 6248 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: key (type: string)
-  sort order: +
-  Map-reduce partition columns: key (type: string)
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
   Statistics: Num rows: 263 Data size: 6248 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 263 Data size: 6248 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: llap
 Reducer 2 
 Execution mode: uber
@@ -900,39 +908,47 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 263 Data size: 6248 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: string)
-1 key (type: string)
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 289 Data size: 6872 Basic stats: 
COMPLETE Column stats: NONE
-  HybridGraceHashJoin: true
-  Group By Operator
-  

[78/91] [abbrv] hive git commit: HIVE-8396 : Hive CliDriver command splitting can be broken when comments are present (Elliot West, reviewed by Sergey Shelukhin)

2015-11-29 Thread xuefu
HIVE-8396 : Hive CliDriver command splitting can be broken when comments are 
present (Elliot West, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ae374a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ae374a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ae374a3

Branch: refs/heads/spark
Commit: 0ae374a320d1cae523ba2b434800e97692507db8
Parents: 454c2ca
Author: Sergey Shelukhin 
Authored: Wed Nov 25 15:13:27 2015 -0800
Committer: Sergey Shelukhin 
Committed: Wed Nov 25 15:13:27 2015 -0800

--
 cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0ae374a3/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
--
diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java 
b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
index e04f247..e77b7f1 100644
--- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
+++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
@@ -770,6 +770,9 @@ public class CliDriver {
   if (!prefix.equals("")) {
 prefix += '\n';
   }
+  if (line.trim().startsWith("--")) {
+continue;
+  }
   if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) {
 line = prefix + line;
 ret = cli.processLine(line, true);



[39/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
--
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out 
b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
index 7f2e19f..d33ca0f 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out
@@ -125,29 +125,37 @@ STAGE PLANS:
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int)
+outputColumnNames: _col0
 Statistics: Num rows: 727 Data size: 2908 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 727 Data size: 2908 Basic stats: 
COMPLETE Column stats: NONE
   TableScan
 alias: y
 Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int)
+outputColumnNames: _col0
 Statistics: Num rows: 724 Data size: 2897 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 724 Data size: 2897 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Operator Tree:
 Join Operator
   condition map:
Inner Join 0 to 1
   keys:
-0 key (type: int)
-1 key (type: int)
+0 _col0 (type: int)
+1 _col0 (type: int)
   outputColumnNames: _col0
   Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE 
Column stats: NONE
   File Output Operator
@@ -180,10 +188,10 @@ STAGE PLANS:
   keys:
 0 _col0 (type: int)
 1 _col0 (type: int)
-  outputColumnNames: _col0, _col3
+  outputColumnNames: _col0, _col2
   Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: _col0 (type: int), _col3 (type: string)
+expressions: _col0 (type: int), _col2 (type: string)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
@@ -217,34 +225,42 @@ STAGE PLANS:
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int)
+outputColumnNames: _col0
 Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 27 Data size: 108 Basic stats: 
COMPLETE Column stats: NONE
   TableScan
 alias: n
 Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int), 

[19/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/ppd_join5.q.out
--
diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out 
b/ql/src/test/results/clientpositive/ppd_join5.q.out
index 0807559..1b46ed5 100644
--- a/ql/src/test/results/clientpositive/ppd_join5.q.out
+++ b/ql/src/test/results/clientpositive/ppd_join5.q.out
@@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE []
 POSTHOOK: Lineage: t1.id2 SIMPLE []
 POSTHOOK: Lineage: t2.d SIMPLE []
 POSTHOOK: Lineage: t2.id SIMPLE []
-Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Stage 'Stage-2:MAPRED' 
is a cross product
+Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain
 select a.*,b.d d1,c.d d2 from
   t1 a join t2 b on (a.id1 = b.id)
@@ -58,31 +58,39 @@ STAGE PLANS:
 Filter Operator
   predicate: (id1 is not null and id2 is not null) (type: boolean)
   Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: id1 (type: string), id2 (type: string)
-sort order: ++
-Map-reduce partition columns: id1 (type: string), id2 (type: 
string)
+  Select Operator
+expressions: id1 (type: string), id2 (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: string), _col1 (type: string)
+  sort order: ++
+  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+  Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
   TableScan
 alias: b
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
 Filter Operator
-  predicate: (id is not null and (d <= 1)) (type: boolean)
+  predicate: ((d <= 1) and id is not null) (type: boolean)
   Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: id (type: string), id (type: string)
-sort order: ++
-Map-reduce partition columns: id (type: string), id (type: 
string)
+  Select Operator
+expressions: id (type: string), d (type: int)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: d (type: int)
+Reduce Output Operator
+  key expressions: _col0 (type: string), _col0 (type: string)
+  sort order: ++
+  Map-reduce partition columns: _col0 (type: string), _col0 
(type: string)
+  Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
+  value expressions: _col1 (type: int)
   Reduce Operator Tree:
 Join Operator
   condition map:
Inner Join 0 to 1
   keys:
-0 id1 (type: string), id2 (type: string)
-1 id (type: string), id (type: string)
-  outputColumnNames: _col0, _col1, _col6
+0 _col0 (type: string), _col1 (type: string)
+1 _col0 (type: string), _col0 (type: string)
+  outputColumnNames: _col0, _col1, _col3
   Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
   File Output Operator
 compressed: false
@@ -98,17 +106,21 @@ STAGE PLANS:
 Reduce Output Operator
   sort order: 
   Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
-  value expressions: _col0 (type: string), _col1 (type: string), 
_col6 (type: int)
+  value expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: int)
   TableScan
-alias: c
+alias: b
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
 Filter Operator
   predicate: (d <= 1) (type: boolean)
   Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-sort order: 
+  Select Operator
+expressions: d (type: int)
+outputColumnNames: _col0
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: d (type: int)
+Reduce Output Operator
+  sort 

[49/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out 
b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out
index f1aadef..85a685b 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out
@@ -157,23 +157,27 @@ STAGE PLANS:
   isSamplingPred: false
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: string)
-  1 key (type: string)
-Position of Big Table: 1
-BucketMapJoin: true
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Reduce Output Operator
-sort order: 
-tag: -1
-value expressions: _col0 (type: bigint)
-auto parallelism: false
+  Select Operator
+expressions: key (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+Sorted Merge Bucket Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
+  Position of Big Table: 1
+  BucketMapJoin: true
+  Group By Operator
+aggregations: count()
+mode: hash
+outputColumnNames: _col0
+Reduce Output Operator
+  sort order: 
+  tag: -1
+  value expressions: _col0 (type: bigint)
+  auto parallelism: false
   Path -> Alias:
  A masked pattern was here 
   Path -> Partition:
@@ -274,8 +278,8 @@ STAGE PLANS:
   name: default.bucket_big
 name: default.bucket_big
   Truncated Path -> Alias:
-/bucket_big/ds=2008-04-08 [b]
-/bucket_big/ds=2008-04-09 [b]
+/bucket_big/ds=2008-04-08 [$hdt$_1:b]
+/bucket_big/ds=2008-04-09 [$hdt$_1:b]
   Needs Tagging: false
   Reduce Operator Tree:
 Group By Operator
@@ -379,23 +383,27 @@ STAGE PLANS:
   isSamplingPred: false
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: string)
-  1 key (type: string)
-Position of Big Table: 0
-BucketMapJoin: true
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Reduce Output Operator
-sort order: 
-tag: -1
-value expressions: _col0 (type: bigint)
-auto parallelism: false
+  Select Operator
+expressions: key (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+Sorted Merge Bucket Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
+  Position of Big Table: 0
+  BucketMapJoin: true
+  Group By Operator
+aggregations: count()
+mode: hash
+outputColumnNames: _col0
+Reduce Output Operator
+  sort order: 
+  tag: -1
+  value expressions: _col0 (type: bigint)
+  auto parallelism: false
   Path -> Alias:
  A masked pattern was here 
   Path -> Partition:
@@ -496,8 +504,8 @@ STAGE PLANS:
   name: default.bucket_big
 name: default.bucket_big
   Truncated Path -> Alias:
-/bucket_big/ds=2008-04-08 [a]
-/bucket_big/ds=2008-04-09 [a]
+/bucket_big/ds=2008-04-08 [$hdt$_0:a]
+

[65/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out 
b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out
deleted file mode 100644
index b1dfd7c..000
--- 
a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out
+++ /dev/null
@@ -1,553 +0,0 @@
-PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
-create table tst1(key string, value string) partitioned by (ds string) 
clustered by (key) into 10 buckets
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S)
-create table tst1(key string, value string) partitioned by (ds string) 
clustered by (key) into 10 buckets
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@tst1
-PREHOOK: query: alter table tst1 clustered by (key) into 8 buckets
-PREHOOK: type: ALTERTABLE_CLUSTER_SORT
-PREHOOK: Input: default@tst1
-PREHOOK: Output: default@tst1
-POSTHOOK: query: alter table tst1 clustered by (key) into 8 buckets
-POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
-POSTHOOK: Input: default@tst1
-POSTHOOK: Output: default@tst1
-PREHOOK: query: describe formatted tst1
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@tst1
-POSTHOOK: query: describe formatted tst1
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@tst1
-# col_name data_type   comment 
-
-keystring  
-value  string  
-
-# Partition Information 
-# col_name data_type   comment 
-
-ds string  
-
-# Detailed Table Information
-Database:  default  
- A masked pattern was here 
-Retention: 0
- A masked pattern was here 
-Table Type:MANAGED_TABLE
-Table Parameters:   
- A masked pattern was here 
-
-# Storage Information   
-SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe  
 
-InputFormat:   org.apache.hadoop.mapred.TextInputFormat 
-OutputFormat:  
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat   
-Compressed:No   
-Num Buckets:   8
-Bucket Columns:[key]
-Sort Columns:  []   
-Storage Desc Params:
-   serialization.format1   
-PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, 
value from src
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@tst1@ds=1
-POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, 
value from src
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@tst1@ds=1
-POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: describe formatted tst1 partition (ds = '1')
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@tst1
-POSTHOOK: query: describe formatted tst1 partition (ds = '1')
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@tst1
-# col_name data_type   comment 
-
-keystring  
-value  string  
-
-# Partition Information 
-# col_name data_type   comment 
-
-ds string  
-
-# Detailed Partition Information
-Partition Value:   [1]  
-Database:  default  
-Table: tst1 
- A masked pattern was here 
-Partition Parameters:   
-   COLUMN_STATS_ACCURATE   true
-   numFiles1   
-   numRows 500 
-   rawDataSize 5312
-   totalSize   5812
- A masked pattern was here 
-
-# Storage Information   
-SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe  
 
-InputFormat:   

[24/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index e9192a3..10b4168 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -210,41 +210,49 @@ STAGE PLANS:
   alias: srcpart
   filterExpr: ds is not null (type: boolean)
   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: ds (type: string)
-sort order: +
-Map-reduce partition columns: ds (type: string)
+  Select Operator
+expressions: ds (type: string)
+outputColumnNames: _col0
 Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: llap
 Map 4 
 Map Operator Tree:
 TableScan
   alias: srcpart_date
-  filterExpr: (ds is not null and (date = '2008-04-08')) 
(type: boolean)
+  filterExpr: ((date = '2008-04-08') and ds is not null) 
(type: boolean)
   Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE 
Column stats: NONE
   Filter Operator
-predicate: (ds is not null and (date = '2008-04-08')) 
(type: boolean)
+predicate: ((date = '2008-04-08') and ds is not null) 
(type: boolean)
 Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: ds (type: string)
-  sort order: +
-  Map-reduce partition columns: ds (type: string)
-  Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: ds (type: string)
   outputColumnNames: _col0
   Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-keys: _col0 (type: string)
-mode: hash
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: _col0 (type: string)
 outputColumnNames: _col0
 Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
-Dynamic Partitioning Event Operator
-  Target Input: srcpart
-  Partition key expr: ds
+Group By Operator
+  keys: _col0 (type: string)
+  mode: hash
+  outputColumnNames: _col0
   Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
-  Target column: ds
-  Target Vertex: Map 1
+  Dynamic Partitioning Event Operator
+Target Input: srcpart
+Partition key expr: ds
+Statistics: Num rows: 1 Data size: 188 Basic 
stats: COMPLETE Column stats: NONE
+Target column: ds
+Target Vertex: Map 1
 Execution mode: vectorized, llap
 Reducer 2 
 Execution mode: llap
@@ -253,8 +261,8 @@ STAGE PLANS:
 condition map:
  Inner Join 0 to 1
 keys:
-  0 ds (type: string)
-  1 ds (type: string)
+  0 _col0 (type: string)
+  1 _col0 (type: string)
 Statistics: Num rows: 2200 Data size: 

[14/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoinopt4.q.out
--
diff --git a/ql/src/test/results/clientpositive/skewjoinopt4.q.out 
b/ql/src/test/results/clientpositive/skewjoinopt4.q.out
index 1d2a5a4..28fb7df 100644
--- a/ql/src/test/results/clientpositive/skewjoinopt4.q.out
+++ b/ql/src/test/results/clientpositive/skewjoinopt4.q.out
@@ -62,43 +62,47 @@ STAGE PLANS:
 Filter Operator
   predicate: (key is not null and (key = '2')) (type: boolean)
   Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: string)
-sort order: +
-Map-reduce partition columns: key (type: string)
+  Select Operator
+expressions: key (type: string), val (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: val (type: string)
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
+  value expressions: _col1 (type: string)
   TableScan
 alias: b
 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column 
stats: NONE
 Filter Operator
   predicate: (key is not null and (key = '2')) (type: boolean)
   Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: string)
-sort order: +
-Map-reduce partition columns: key (type: string)
+  Select Operator
+expressions: key (type: string), val (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: val (type: string)
+Reduce Output Operator
+  key expressions: _col0 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: string)
+  Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
+  value expressions: _col1 (type: string)
   Reduce Operator Tree:
 Join Operator
   condition map:
Inner Join 0 to 1
   keys:
-0 key (type: string)
-1 key (type: string)
-  outputColumnNames: _col0, _col1, _col5, _col6
+0 _col0 (type: string)
+1 _col0 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column 
stats: NONE
-  Select Operator
-expressions: _col0 (type: string), _col1 (type: string), _col5 
(type: string), _col6 (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3
-Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column 
stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  File Output Operator
+compressed: false
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
 Map Reduce
@@ -133,40 +137,44 @@ STAGE PLANS:
 Filter Operator
   predicate: (key is not null and (not (key = '2'))) (type: 
boolean)
   Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: string)
-sort order: +
-Map-reduce partition columns: key (type: string)
+  Select Operator
+expressions: key (type: string), val (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: val (type: string)
+Reduce Output Operator
+  

[31/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
index 1f1bf3d..814c947 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
@@ -135,12 +135,16 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: key (type: int)
-  sort order: +
-  Map-reduce partition columns: key (type: int)
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: value (type: string)
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 121 Data size: 1283 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col1 (type: string)
 Execution mode: llap
 Map 2 
 Map Operator Tree:
@@ -150,28 +154,32 @@ STAGE PLANS:
   Filter Operator
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: int)
-1 key (type: int)
-  outputColumnNames: _col0, _col1, _col7
-  input vertices:
-0 Map 1
-  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-  HybridGraceHashJoin: true
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: string), 
_col7 (type: string)
-outputColumnNames: _col0, _col1, _col2
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+  Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+outputColumnNames: _col0, _col1, _col3
+input vertices:
+  0 Map 1
 Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
+HybridGraceHashJoin: true
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: 
string), _col3 (type: string)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+Statistics: Num rows: 275 Data size: 2921 Basic 
stats: COMPLETE Column stats: NONE
+table:
+input format: 
org.apache.hadoop.mapred.TextInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: llap
 
   Stage: Stage-0
@@ -229,11 +237,15 @@ STAGE PLANS:
   Filter Operator
 predicate: key 

[45/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out 
b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out
index bbfa756..31a1b29 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out
@@ -68,19 +68,23 @@ STAGE PLANS:
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: int)
-  1 key (type: int)
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Reduce Output Operator
-sort order: 
-value expressions: _col0 (type: bigint)
+  Select Operator
+expressions: key (type: int)
+outputColumnNames: _col0
+Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
+Sorted Merge Bucket Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: int)
+1 _col0 (type: int)
+  Group By Operator
+aggregations: count()
+mode: hash
+outputColumnNames: _col0
+Reduce Output Operator
+  sort order: 
+  value expressions: _col0 (type: bigint)
   Reduce Operator Tree:
 Group By Operator
   aggregations: count(VALUE._col0)
@@ -144,23 +148,27 @@ STAGE PLANS:
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: int)
-  1 key (type: int)
+  Select Operator
+expressions: key (type: int)
 outputColumnNames: _col0
-Group By Operator
-  aggregations: count()
-  keys: _col0 (type: int)
-  mode: hash
-  outputColumnNames: _col0, _col1
-  Reduce Output Operator
-key expressions: _col0 (type: int)
-sort order: +
-Map-reduce partition columns: _col0 (type: int)
-value expressions: _col1 (type: bigint)
+Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
+Sorted Merge Bucket Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 _col0 (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col0
+  Group By Operator
+aggregations: count()
+keys: _col0 (type: int)
+mode: hash
+outputColumnNames: _col0, _col1
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  value expressions: _col1 (type: bigint)
   Reduce Operator Tree:
 Group By Operator
   aggregations: count(VALUE._col0)
@@ -241,29 +249,30 @@ STAGE PLANS:
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-  Sorted Merge Bucket Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: int)
-  1 key (type: int)
+  Select Operator
+expressions: key (type: int)
 outputColumnNames: _col0
-Group By Operator
-  aggregations: count()
-  keys: _col0 (type: int)
-  mode: hash
-  outputColumnNames: _col0, _col1
-  Reduce Output Operator
-key expressions: _col0 (type: int)
-sort order: +
-Map-reduce partition 

[38/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/cross_product_check_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/cross_product_check_2.q.out 
b/ql/src/test/results/clientpositive/cross_product_check_2.q.out
index 6910b40..57d1498 100644
--- a/ql/src/test/results/clientpositive/cross_product_check_2.q.out
+++ b/ql/src/test/results/clientpositive/cross_product_check_2.q.out
@@ -93,7 +93,7 @@ STAGE PLANS:
   Processor Tree:
 ListSink
 
-Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-5:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-5:MAPRED' is a cross 
product
 PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
@@ -107,60 +107,68 @@ STAGE PLANS:
   Stage: Stage-7
 Map Reduce Local Work
   Alias -> Map Local Tables:
-a 
+$hdt$_0:d1 
   Fetch Operator
 limit: -1
-d1 
+$hdt$_2:a 
   Fetch Operator
 limit: -1
   Alias -> Map Local Operator Tree:
-a 
-  TableScan
-alias: a
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-HashTable Sink Operator
-  keys:
-0 
-1 
-d1 
+$hdt$_0:d1 
   TableScan
 alias: d1
 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE 
Column stats: NONE
+HashTable Sink Operator
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
+$hdt$_2:a 
+  TableScan
+alias: a
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: key (type: string), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   HashTable Sink Operator
 keys:
-  0 key (type: string)
-  1 key (type: string)
+  0 
+  1 
 
   Stage: Stage-5
 Map Reduce
   Map Operator Tree:
   TableScan
-alias: d2
+alias: d1
 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE 
Column stats: NONE
-  Map Join Operator
-condition map:
- Inner Join 0 to 1
-keys:
-  0 key (type: string)
-  1 key (type: string)
-outputColumnNames: _col0, _col1, _col5, _col6
-Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE 
Column stats: NONE
 Map Join Operator
   condition map:
Inner Join 0 to 1
   keys:
-0 
-1 
-  outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
-  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: string), _col1 (type: string), 
_col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 
(type: string)
+0 _col0 (type: string)
+1 _col0 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
+  Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 
+  1 
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
 

[68/91] [abbrv] hive git commit: HIVE-12329 :Turn on limit pushdown optimization by default (Ashutosh Chauhan via Prasanth J)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out
--
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out 
b/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out
index 12920d2..bcbdf06 100644
--- a/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out
@@ -161,6 +161,7 @@ STAGE PLANS:
   key expressions: _col0 (type: int)
   sort order: +
   Statistics: Num rows: 524 Data size: 155436 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
   value expressions: _col1 (type: double), _col2 (type: 
double), _col3 (type: decimal(14,4))
 Reducer 3 
 Execution mode: vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_char_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_char_2.q.out 
b/ql/src/test/results/clientpositive/tez/vector_char_2.q.out
index 8545608..f88ee91 100644
--- a/ql/src/test/results/clientpositive/tez/vector_char_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_char_2.q.out
@@ -106,6 +106,7 @@ STAGE PLANS:
   key expressions: _col0 (type: char(20))
   sort order: +
   Statistics: Num rows: 250 Data size: 49500 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
   value expressions: _col1 (type: bigint), _col2 (type: bigint)
 Reducer 3 
 Execution mode: vectorized
@@ -234,6 +235,7 @@ STAGE PLANS:
   key expressions: _col0 (type: char(20))
   sort order: -
   Statistics: Num rows: 250 Data size: 49500 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
   value expressions: _col1 (type: bigint), _col2 (type: bigint)
 Reducer 3 
 Execution mode: vectorized

http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out 
b/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out
index be38775..617620c 100644
--- a/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out
@@ -79,6 +79,7 @@ STAGE PLANS:
   key expressions: _col0 (type: char(10))
   sort order: +
   Statistics: Num rows: 500 Data size: 99000 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
   value expressions: _col1 (type: char(20))
 Execution mode: vectorized
 Reducer 2 
@@ -179,6 +180,7 @@ STAGE PLANS:
   key expressions: _col0 (type: char(10))
   sort order: -
   Statistics: Num rows: 500 Data size: 99000 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
   value expressions: _col1 (type: char(20))
 Execution mode: vectorized
 Reducer 2 
@@ -282,6 +284,7 @@ STAGE PLANS:
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 10 Data size: 2150 Basic stats: 
COMPLETE Column stats: NONE
+TopN Hash Memory Usage: 0.1
 value expressions: _col0 (type: int)
 Execution mode: vectorized
 Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out 
b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out
index c492113..1142485 100644
--- a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out
@@ -41,6 +41,7 @@ STAGE PLANS:
 key expressions: null (type: double), _col1 (type: 
string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 
(type: string)
 sort order: ++
 Statistics: Num rows: 6144 Data size: 1320982 Basic 
stats: COMPLETE Column stats: NONE
+TopN Hash Memory Usage: 0.1
 Execution mode: 

[84/91] [abbrv] hive git commit: HIVE-12465: Hive might produce wrong results when (outer) joins are merged (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
HIVE-12465: Hive might produce wrong results when (outer) joins are merged 
(Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79847387
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79847387
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79847387

Branch: refs/heads/spark
Commit: 79847387699b803506ecd8b03ecc8790ee229751
Parents: f1ac5a3
Author: Jesus Camacho Rodriguez 
Authored: Tue Nov 24 17:20:05 2015 +0100
Committer: Jesus Camacho Rodriguez 
Committed: Fri Nov 27 10:10:46 2015 +0100

--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   2 +-
 ql/src/test/queries/clientpositive/mergejoin.q  |  12 +
 .../test/results/clientpositive/mergejoin.q.out | 548 +++
 .../results/clientpositive/tez/mergejoin.q.out  | 548 +++
 4 files changed, 1109 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1b7873d..0ff6001 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -2265,7 +2265,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   if (rightCondAl1.size() != 0) {
 QBJoinTree leftTree = joinTree.getJoinSrc();
 List leftTreeLeftSrc = new ArrayList();
-if (leftTree != null) {
+if (leftTree != null && leftTree.getNoOuterJoin()) {
   String leftTreeRightSource = leftTree.getRightAliases() != null 
&&
   leftTree.getRightAliases().length > 0 ?
   leftTree.getRightAliases()[0] : null;

http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/test/queries/clientpositive/mergejoin.q
--
diff --git a/ql/src/test/queries/clientpositive/mergejoin.q 
b/ql/src/test/queries/clientpositive/mergejoin.q
index 6cd3929..82e1c93 100644
--- a/ql/src/test/queries/clientpositive/mergejoin.q
+++ b/ql/src/test/queries/clientpositive/mergejoin.q
@@ -132,3 +132,15 @@ select * from
 (select * from tab where tab.key = 0)a
 join
 (select * from tab_part where tab_part.key = 98)b on a.key = b.key full outer 
join tab_part c on b.key = c.key;
+
+set hive.cbo.enable = false;
+
+select * from
+(select * from tab where tab.key = 0)a
+full outer join
+(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = 
b.key and b.key = c.key;
+
+select * from
+(select * from tab where tab.key = 0)a
+join
+(select * from tab_part where tab_part.key = 98)b full outer join tab_part c 
on a.key = b.key and b.key = c.key;

http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/test/results/clientpositive/mergejoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out 
b/ql/src/test/results/clientpositive/mergejoin.q.out
index 65f5ef5..e4a9e5b 100644
--- a/ql/src/test/results/clientpositive/mergejoin.q.out
+++ b/ql/src/test/results/clientpositive/mergejoin.q.out
@@ -3787,3 +3787,551 @@ NULLNULLNULLNULLNULLNULL97  
val_97  2008-04-08
 NULL   NULLNULLNULLNULLNULL97  val_97  2008-04-08
 NULL   NULLNULLNULLNULLNULL98  val_98  2008-04-08
 NULL   NULLNULLNULLNULLNULL98  val_98  2008-04-08
+Warning: Shuffle Join JOIN[9][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a 
cross product
+PREHOOK: query: select * from
+(select * from tab where tab.key = 0)a
+full outer join
+(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = 
b.key and b.key = c.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+ A masked pattern was here 
+POSTHOOK: query: select * from
+(select * from tab where tab.key = 0)a
+full outer join
+(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = 
b.key and b.key = c.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+ A masked pattern was here 
+Warning: Shuffle Join JOIN[9][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a 
cross product
+PREHOOK: query: select * 

[62/91] [abbrv] hive git commit: HIVE-12175: Upgrade Kryo version to 3.0.x (Prasanth Jayachandran reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
HIVE-12175: Upgrade Kryo version to 3.0.x (Prasanth Jayachandran reviewed by 
Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b7281ce6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b7281ce6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b7281ce6

Branch: refs/heads/spark
Commit: b7281ce6a61bcfbd398fd691cddc38c4f1a61f64
Parents: c6a835c
Author: Prasanth Jayachandran 
Authored: Tue Nov 24 12:43:46 2015 -0600
Committer: Prasanth Jayachandran 
Committed: Tue Nov 24 12:43:46 2015 -0600

--
 itests/qtest-accumulo/pom.xml   |   2 +-
 pom.xml |   6 +-
 ql/pom.xml  |  36 +++--
 .../apache/hadoop/hive/ql/exec/Utilities.java   | 145 +--
 .../org/apache/hadoop/hive/ql/plan/MapWork.java |  15 --
 .../apache/hadoop/hive/ql/plan/ReduceWork.java  |   5 -
 spark-client/pom.xml|  28 ++--
 .../hive/spark/client/rpc/KryoMessageCodec.java |  11 +-
 8 files changed, 185 insertions(+), 63 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/itests/qtest-accumulo/pom.xml
--
diff --git a/itests/qtest-accumulo/pom.xml b/itests/qtest-accumulo/pom.xml
index 7403a15..f7325dc 100644
--- a/itests/qtest-accumulo/pom.xml
+++ b/itests/qtest-accumulo/pom.xml
@@ -123,7 +123,7 @@
 
 
-  com.esotericsoftware.kryo
+  com.esotericsoftware
   kryo
   ${kryo.version}
   test

http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/pom.xml
--
diff --git a/pom.xml b/pom.xml
index c6df4a5..c38c10f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,7 @@
 3.5.2
 20090211
 4.11
-2.22
+3.0.3
 0.9.3
 0.9.3
 2.4
@@ -228,8 +228,8 @@
 
   
   
-com.esotericsoftware.kryo
-kryo
+com.esotericsoftware
+kryo-shaded
 ${kryo.version}
   
   

http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/ql/pom.xml
--
diff --git a/ql/pom.xml b/ql/pom.xml
index 9420a62..d893099 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -72,8 +72,8 @@
 
 
 
-  com.esotericsoftware.kryo
-  kryo
+  com.esotericsoftware
+  kryo-shaded
   ${kryo.version}
 
 
@@ -594,16 +594,20 @@
   spark-core_${scala.binary.version}
   ${spark.version}
   true
-
- 
-org.slf4j
-slf4j-log4j12
-  
-  
-commmons-logging
-commons-logging
-  
-
+  
+   
+ com.esotericsoftware.kryo
+ kryo
+   
+   
+ org.slf4j
+ slf4j-log4j12
+   
+   
+ commmons-logging
+ commons-logging
+   
+ 

 
   com.sun.jersey
@@ -746,7 +750,9 @@
   org.apache.hive:hive-serde
   org.apache.hive:hive-llap-client
   org.apache.hive:hive-metastore
-  com.esotericsoftware.kryo:kryo
+  com.esotericsoftware:kryo-shaded
+ com.esotericsoftware:minlog
+ org.objenesis:objenesis
   org.apache.parquet:parquet-hadoop-bundle
   org.apache.thrift:libthrift
   org.apache.thrift:libfb303
@@ -779,6 +785,10 @@
   com.esotericsoftware
   
org.apache.hive.com.esotericsoftware
 
+
+  org.objenesis
+  org.apache.hive.org.objenesis
+
   
 
   

http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 9dbb45a..8b8cf6d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -37,6 +37,8 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
 import java.net.URI;
 import java.net.URL;
 import java.net.URLClassLoader;
@@ -87,8 +89,6 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.WordUtils;
 import org.apache.commons.lang3.StringEscapeUtils;
 import 

[30/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out 
b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out
index 72a5d0d..a1addb7 100644
--- a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out
+++ b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out
@@ -38,76 +38,79 @@ union all
 select 2 as id from tb2 limit 1) b
 on a.id=b.id
 POSTHOOK: type: QUERY
-Plan not optimized by CBO due to missing statistics. Please check log for more 
details.
+Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 1 <- Union 2 (CONTAINS)
-Map 5 <- Union 2 (CONTAINS)
-Reducer 3 <- Union 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Map 3 <- Union 4 (CONTAINS)
+Map 6 <- Union 4 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 5 <- Union 4 (SIMPLE_EDGE)
 
 Stage-0
Fetch Operator
   limit:-1
   Stage-1
- Reducer 4 llap
- File Output Operator [FS_16]
+ Reducer 2 llap
+ File Output Operator [FS_17]
 compressed:false
 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
 table:{"input 
format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
-Merge Join Operator [MERGEJOIN_20]
+Merge Join Operator [MERGEJOIN_21]
 |  condition map:[{"":"Left Outer Join0 to 1"}]
-|  keys:{"0":"id (type: int)","1":"_col0 (type: int)"}
+|  keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
 |  outputColumnNames:["_col0"]
 |  Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-|<-Map 6 [SIMPLE_EDGE] llap
-|  Reduce Output Operator [RS_12]
-| key expressions:id (type: int)
-| Map-reduce partition columns:id (type: int)
+|<-Map 1 [SIMPLE_EDGE] llap
+|  Reduce Output Operator [RS_13]
+| key expressions:_col0 (type: int)
+| Map-reduce partition columns:_col0 (type: int)
 | sort order:+
 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-| TableScan [TS_11]
-|alias:a
+| Select Operator [SEL_1]
+|outputColumnNames:["_col0"]
 |Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-|<-Reducer 3 [SIMPLE_EDGE] llap
-   Reduce Output Operator [RS_13]
+|TableScan [TS_0]
+|   alias:a
+|   Statistics:Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+|<-Reducer 5 [SIMPLE_EDGE] llap
+   Reduce Output Operator [RS_14]
   key expressions:_col0 (type: int)
   Map-reduce partition columns:_col0 (type: int)
   sort order:+
   Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
-  Limit [LIM_10]
+  Limit [LIM_11]
  Number of rows:1
  Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
- Select Operator [SEL_9]
+ Select Operator [SEL_10]
  |  outputColumnNames:["_col0"]
  |  Statistics:Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
- |<-Union 2 [SIMPLE_EDGE]
-|<-Map 1 [CONTAINS] llap
-|  Reduce Output Operator [RS_8]
+ |<-Union 4 [SIMPLE_EDGE]
+|<-Map 3 [CONTAINS] llap
+|  Reduce Output Operator [RS_9]
 | sort order:
 | Statistics:Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
 | value expressions:_col0 (type: int)
-| Limit [LIM_7]
+| Limit [LIM_8]
 |Number of rows:1
 |Statistics:Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: COMPLETE
-|Select Operator [SEL_1]
+|Select Operator [SEL_3]
 |   outputColumnNames:["_col0"]
 |   Statistics:Num rows: 1 Data size: 0 Basic 
stats: PARTIAL Column stats: NONE
-|   

[40/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer13.q.out
--
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out 
b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out
index d652d87..8771f1c 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out
@@ -65,11 +65,11 @@ STAGE PLANS:
   Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: c3 (type: string), c1 (type: int)
-outputColumnNames: c3, c1
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 171 Data size: 3819 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: count(1)
-  keys: c3 (type: string), c1 (type: int)
+  keys: _col0 (type: string), _col1 (type: int)
   mode: hash
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 171 Data size: 3819 Basic stats: 
COMPLETE Column stats: NONE
@@ -112,7 +112,7 @@ STAGE PLANS:
   key expressions: _col0 (type: int), _col1 (type: string)
   sort order: ++
   Map-reduce partition columns: _col0 (type: int), _col1 (type: 
string)
-  Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE 
Column stats: NONE
   value expressions: _col2 (type: bigint)
   Reduce Operator Tree:
 Join Operator
@@ -159,26 +159,26 @@ STAGE PLANS:
 Map Reduce
   Map Operator Tree:
   TableScan
-alias: x1
+alias: x
 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
-  predicate: (((c2 > 100) and c1 is not null) and c3 is not null) 
(type: boolean)
-  Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE 
Column stats: NONE
+  predicate: c2 > 100) and (c1 < 120)) and c1 is not null) and 
c3 is not null) (type: boolean)
+  Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: c3 (type: string), c1 (type: int)
-outputColumnNames: c3, c1
-Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE 
Column stats: NONE
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE 
Column stats: NONE
 Group By Operator
   aggregations: count(1)
-  keys: c3 (type: string), c1 (type: int)
+  keys: _col0 (type: string), _col1 (type: int)
   mode: hash
   outputColumnNames: _col0, _col1, _col2
-  Statistics: Num rows: 86 Data size: 1921 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 29 Data size: 647 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: string), _col1 (type: int)
 sort order: ++
 Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
-Statistics: Num rows: 86 Data size: 1921 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 29 Data size: 647 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col2 (type: bigint)
   Reduce Operator Tree:
 Group By Operator
@@ -186,11 +186,11 @@ STAGE PLANS:
   keys: KEY._col0 (type: string), KEY._col1 (type: int)
   mode: mergepartial
   outputColumnNames: _col0, _col1, _col2
-  Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column 
stats: NONE
+  Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column 
stats: NONE
   Select Operator
 expressions: _col1 (type: int), _col0 (type: string), _col2 (type: 
bigint)
 outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
   compressed: false
   table:

http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer4.q.out
--
diff --git 

[69/91] [abbrv] hive git commit: HIVE-12329 :Turn on limit pushdown optimization by default (Ashutosh Chauhan via Prasanth J)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out
--
diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out 
b/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out
index 64a3ea2..8608187 100644
--- a/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out
+++ b/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out
@@ -124,6 +124,8 @@ STAGE PLANS:
 key expressions: _col0 (type: int)
 sort order: +
 tag: -1
+TopN: 10
+TopN Hash Memory Usage: 0.1
 value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: string)
 auto parallelism: false
   Path -> Alias:
@@ -382,6 +384,8 @@ STAGE PLANS:
 key expressions: _col0 (type: int)
 sort order: +
 tag: -1
+TopN: 10
+TopN Hash Memory Usage: 0.1
 value expressions: _col1 (type: int), _col2 (type: 
string), _col3 (type: int), _col4 (type: int), _col5 (type: string)
 auto parallelism: false
   Path -> Alias:
@@ -588,6 +592,8 @@ STAGE PLANS:
 key expressions: _col0 (type: int)
 sort order: +
 tag: -1
+TopN: 10
+TopN Hash Memory Usage: 0.1
 value expressions: _col1 (type: int), _col2 (type: 
string), _col3 (type: int), _col4 (type: int), _col5 (type: string)
 auto parallelism: false
   Path -> Alias:
@@ -827,6 +833,8 @@ STAGE PLANS:
 sort order: +
 Statistics: Num rows: 137 Data size: 1984 Basic stats: 
COMPLETE Column stats: NONE
 tag: -1
+TopN: 10
+TopN Hash Memory Usage: 0.1
 value expressions: _col1 (type: int), _col2 (type: 
string), _col3 (type: int), _col4 (type: int), _col5 (type: string)
 auto parallelism: false
   Local Work:

http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out 
b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
index a234ff5..932fdcc 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out
@@ -65,6 +65,7 @@ STAGE PLANS:
   key expressions: _col0 (type: string), _col1 (type: string)
   sort order: ++
   Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
 Reducer 3 
 Reduce Operator Tree:
   Select Operator
@@ -238,6 +239,7 @@ STAGE PLANS:
   key expressions: _col0 (type: string), _col1 (type: string)
   sort order: ++
   Statistics: Num rows: 302 Data size: 3213 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
 Reducer 4 
 Reduce Operator Tree:
   Select Operator
@@ -411,6 +413,7 @@ STAGE PLANS:
   key expressions: _col0 (type: string), _col1 (type: string)
   sort order: ++
   Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
 Reducer 4 
 Reduce Operator Tree:
   Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out 
b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
index a99cb74..84f68a3 100644
--- a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
@@ -262,6 +262,8 @@ STAGE PLANS:
   sort order: ++
   Statistics: Num rows: 378 Data size: 1514 Basic 
stats: COMPLETE Column stats: NONE
   tag: -1
+  TopN: 1
+  TopN Hash Memory Usage: 0.1
   auto parallelism: false
 Local Work:
   Map Reduce Local Work

http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/ctas.q.out

[59/91] [abbrv] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
--
diff --git a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out 
b/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
deleted file mode 100644
index 626dcff..000
--- a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
+++ /dev/null
@@ -1,465 +0,0 @@
-PREHOOK: query: -- partitioned table analyze 
-
-create table dummy (key string, value string) partitioned by (ds string, hr 
string)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@dummy
-POSTHOOK: query: -- partitioned table analyze 
-
-create table dummy (key string, value string) partitioned by (ds string, hr 
string)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@dummy
-PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='12')
-PREHOOK: type: LOAD
- A masked pattern was here 
-PREHOOK: Output: default@dummy
-POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='12')
-POSTHOOK: type: LOAD
- A masked pattern was here 
-POSTHOOK: Output: default@dummy
-POSTHOOK: Output: default@dummy@ds=2008/hr=12
-PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='11')
-PREHOOK: type: LOAD
- A masked pattern was here 
-PREHOOK: Output: default@dummy
-POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='11')
-POSTHOOK: type: LOAD
- A masked pattern was here 
-POSTHOOK: Output: default@dummy
-POSTHOOK: Output: default@dummy@ds=2008/hr=11
-PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics
-PREHOOK: type: QUERY
-PREHOOK: Input: default@dummy
-PREHOOK: Input: default@dummy@ds=2008/hr=11
-PREHOOK: Input: default@dummy@ds=2008/hr=12
-PREHOOK: Output: default@dummy
-PREHOOK: Output: default@dummy@ds=2008/hr=11
-PREHOOK: Output: default@dummy@ds=2008/hr=12
-POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@dummy
-POSTHOOK: Input: default@dummy@ds=2008/hr=11
-POSTHOOK: Input: default@dummy@ds=2008/hr=12
-POSTHOOK: Output: default@dummy
-POSTHOOK: Output: default@dummy@ds=2008/hr=11
-POSTHOOK: Output: default@dummy@ds=2008/hr=12
-PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@dummy
-POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@dummy
-# col_name data_type   comment 
-
-keystring  
-value  string  
-
-# Partition Information 
-# col_name data_type   comment 
-
-ds string  
-hr string  
-
-# Detailed Partition Information
-Partition Value:   [2008, 11]   
-Database:  default  
-Table: dummy
- A masked pattern was here 
-Partition Parameters:   
-   COLUMN_STATS_ACCURATE   true
-   numFiles1   
-   numRows 500 
-   rawDataSize 5312
-   totalSize   5812
- A masked pattern was here 
-
-# Storage Information   
-SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe  
 
-InputFormat:   org.apache.hadoop.mapred.TextInputFormat 
-OutputFormat:  
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat   
-Compressed:No   
-Num Buckets:   -1   
-Bucket Columns:[]   
-Sort Columns:  []   
-Storage Desc Params:
-   serialization.format1   
-PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@dummy
-POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@dummy
-# col_name data_type   comment 
-
-keystring  
-value  string  

[34/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out 
b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
index b0258b8..094f646 100644
--- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
+++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
@@ -76,61 +76,66 @@ STAGE PLANS:
 Filter Operator
   predicate: p_name is not null (type: boolean)
   Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: p_name (type: string)
-sort order: +
-Map-reduce partition columns: p_name (type: string)
+  Select Operator
+expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
+Reduce Output Operator
+  key expressions: _col1 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col1 (type: string)
+  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: int), _col2 (type: string), 
_col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: 
string), _col7 (type: double), _col8 (type: string)
   TableScan
 alias: p2
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
 Filter Operator
   predicate: p2_name is not null (type: boolean)
   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-  Reduce Output Operator
-key expressions: p2_name (type: string)
-sort order: +
-Map-reduce partition columns: p2_name (type: string)
+  Select Operator
+expressions: p2_partkey (type: int), p2_name (type: string), 
p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), 
p2_size (type: int), p2_container (type: string), p2_retailprice (type: 
double), p2_comment (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-value expressions: p2_partkey (type: int), p2_mfgr (type: 
string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), 
p2_container (type: string), p2_retailprice (type: double), p2_comment (type: 
string)
+Reduce Output Operator
+  key expressions: _col1 (type: string)
+  sort order: +
+  Map-reduce partition columns: _col1 (type: string)
+  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+  value expressions: _col0 (type: int), _col2 (type: string), 
_col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: 
string), _col7 (type: double), _col8 (type: string)
   TableScan
 alias: p3
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
 Filter Operator
   predicate: p3_name is not null (type: boolean)
   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-  Reduce Output Operator
-key expressions: p3_name (type: string)
-sort order: +
-Map-reduce partition columns: p3_name (type: string)
+  Select Operator
+expressions: p3_partkey (type: int), p3_name (type: string), 
p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), 
p3_size (type: int), p3_container (type: string), p3_retailprice (type: 
double), p3_comment (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-value expressions: p3_partkey (type: int), p3_mfgr (type: 

[33/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/join_filters_overlap.q.out
--
diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out 
b/ql/src/test/results/clientpositive/join_filters_overlap.q.out
index f52cf26..1d04f37 100644
--- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out
+++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out
@@ -104,46 +104,58 @@ STAGE PLANS:
 alias: a
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column 
stats: NONE
 GatherStats: false
-Reduce Output Operator
-  key expressions: key (type: int)
-  sort order: +
-  Map-reduce partition columns: key (type: int)
+Select Operator
+  expressions: key (type: int), value (type: int)
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
-  tag: 0
-  value expressions: value (type: int)
-  auto parallelism: false
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
+tag: 0
+value expressions: _col1 (type: int)
+auto parallelism: false
   TableScan
-alias: b
+alias: a
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column 
stats: NONE
 GatherStats: false
 Filter Operator
   isSamplingPred: false
   predicate: (value = 50) (type: boolean)
   Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int), 50 (type: int)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
-tag: 1
-value expressions: value (type: int)
-auto parallelism: false
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+  tag: 1
+  value expressions: _col1 (type: int)
+  auto parallelism: false
   TableScan
-alias: c
+alias: a
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column 
stats: NONE
 GatherStats: false
 Filter Operator
   isSamplingPred: false
   predicate: (value = 60) (type: boolean)
   Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int), 60 (type: int)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
-tag: 2
-value expressions: value (type: int)
-auto parallelism: false
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+  tag: 2
+  value expressions: _col1 (type: int)
+  auto parallelism: false
   Path -> Alias:
  A masked pattern was here 
   Path -> Partition:
@@ -192,7 +204,7 @@ STAGE PLANS:
   name: default.a
 name: default.a
   Truncated Path -> Alias:
-/a [a, b, c]
+/a [$hdt$_0:a, $hdt$_1:a, $hdt$_2:a]
   Needs Tagging: true
   Reduce Operator Tree:
 Join Operator
@@ -206,37 +218,33 @@ STAGE PLANS:
 1 
 2 
   keys:
-0 key (type: int)
-1 key (type: int)
-2 key (type: int)
-  outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+0 _col0 (type: int)
+1 _col0 (type: int)
+ 

[09/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out
index fed923c..4133fda 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out
@@ -173,11 +173,15 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 15 Data size: 1583 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 key (type: int)
-1 key (type: int)
-  Position of Big Table: 1
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 15 Data size: 1583 Basic stats: 
COMPLETE Column stats: NONE
+  Spark HashTable Sink Operator
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Position of Big Table: 1
 Local Work:
   Map Reduce Local Work
 Bucket Mapjoin Context:
@@ -252,48 +256,52 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 28 Data size: 2958 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: int)
-1 key (type: int)
-  outputColumnNames: _col0, _col1, _col7
-  input vertices:
-0 Map 1
-  Position of Big Table: 1
-  Statistics: Num rows: 30 Data size: 3253 Basic stats: 
COMPLETE Column stats: NONE
-  BucketMapJoin: true
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: string), 
_col7 (type: string)
-outputColumnNames: _col0, _col1, _col2
+Select Operator
+  expressions: key (type: int), value (type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 28 Data size: 2958 Basic stats: 
COMPLETE Column stats: NONE
+  Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+outputColumnNames: _col0, _col1, _col3
+input vertices:
+  0 Map 1
+Position of Big Table: 1
 Statistics: Num rows: 30 Data size: 3253 Basic stats: 
COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
-  GlobalTableId: 1
- A masked pattern was here 
-  NumFilesPerFileSink: 1
+BucketMapJoin: true
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: 
string), _col3 (type: string)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 30 Data size: 3253 Basic 
stats: COMPLETE Column stats: NONE
- A masked pattern was here 
-  table:
-  input format: 
org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  properties:
-bucket_count -1
-columns key,value1,value2
-columns.comments 
-columns.types string:string:string
- A masked pattern was here 
-name default.bucketmapjoin_tmp_result
-serialization.ddl struct 
bucketmapjoin_tmp_result { string key, string value1, string value2}
-serialization.format 1
-serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- A masked pattern was here 
- 

[04/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out 
b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
index 5a77830..2eb0c3b 100644
--- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
@@ -79,12 +79,16 @@ STAGE PLANS:
   Filter Operator
 predicate: p_name is not null (type: boolean)
 Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: p_name (type: string)
-  sort order: +
-  Map-reduce partition columns: p_name (type: string)
+Select Operator
+  expressions: p_partkey (type: int), p_name (type: 
string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), 
p_size (type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
   Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
+  Reduce Output Operator
+key expressions: _col1 (type: string)
+sort order: +
+Map-reduce partition columns: _col1 (type: string)
+Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
 Map 3 
 Map Operator Tree:
 TableScan
@@ -93,12 +97,16 @@ STAGE PLANS:
   Filter Operator
 predicate: p2_name is not null (type: boolean)
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-Reduce Output Operator
-  key expressions: p2_name (type: string)
-  sort order: +
-  Map-reduce partition columns: p2_name (type: string)
+Select Operator
+  expressions: p2_partkey (type: int), p2_name (type: 
string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: 
string), p2_size (type: int), p2_container (type: string), p2_retailprice 
(type: double), p2_comment (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
   Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
-  value expressions: p2_partkey (type: int), p2_mfgr 
(type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: 
int), p2_container (type: string), p2_retailprice (type: double), p2_comment 
(type: string)
+  Reduce Output Operator
+key expressions: _col1 (type: string)
+sort order: +
+Map-reduce partition columns: _col1 (type: string)
+Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
 Map 4 
 Map Operator Tree:
 TableScan
@@ -107,12 +115,16 @@ STAGE PLANS:
   Filter Operator
 predicate: p3_name is not null (type: boolean)
 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-Reduce Output Operator
-  key expressions: p3_name (type: string)
-  sort order: +
-  Map-reduce partition columns: p3_name (type: string)
+Select Operator
+  expressions: p3_partkey (type: int), p3_name (type: 
string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: 
string), p3_size (type: int), p3_container (type: string), p3_retailprice 
(type: double), p3_comment (type: string)
+  

[18/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out 
b/ql/src/test/results/clientpositive/skewjoin.q.out
index 22a9421..13c4470 100644
--- a/ql/src/test/results/clientpositive/skewjoin.q.out
+++ b/ql/src/test/results/clientpositive/skewjoin.q.out
@@ -101,15 +101,14 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
+expressions: key (type: string)
+outputColumnNames: _col0
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: string)
   sort order: +
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type: string)
   TableScan
 alias: src1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -117,14 +116,15 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: string)
   sort order: +
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col1 (type: string)
   Reduce Operator Tree:
 Join Operator
   condition map:
@@ -133,10 +133,10 @@ STAGE PLANS:
   keys:
 0 _col0 (type: string)
 1 _col0 (type: string)
-  outputColumnNames: _col1, _col2
+  outputColumnNames: _col0, _col2
   Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: UDFToInteger(_col2) (type: int), _col1 (type: string)
+expressions: UDFToInteger(_col0) (type: int), _col2 (type: string)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
@@ -175,9 +175,9 @@ STAGE PLANS:
   keys:
 0 reducesinkkey0 (type: string)
 1 reducesinkkey0 (type: string)
-  outputColumnNames: _col1, _col2
+  outputColumnNames: _col0, _col2
   Select Operator
-expressions: UDFToInteger(_col2) (type: int), _col1 (type: 
string)
+expressions: UDFToInteger(_col0) (type: int), _col2 (type: 
string)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
@@ -595,15 +595,14 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
+expressions: key (type: string)
+outputColumnNames: _col0
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: string)
   sort order: +
   Map-reduce partition columns: _col0 (type: string)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type: string)
   TableScan
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
@@ -611,14 +610,15 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
 

[02/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out 
b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
index 0f9d1ae..7ac16d0 100644
--- a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out
@@ -109,14 +109,18 @@ STAGE PLANS:
   alias: a
   Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
   GatherStats: false
-  Reduce Output Operator
-key expressions: key (type: int)
-sort order: +
-Map-reduce partition columns: key (type: int)
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 3 Data size: 18 Basic stats: 
COMPLETE Column stats: NONE
-tag: 0
-value expressions: value (type: int)
-auto parallelism: false
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 3 Data size: 18 Basic stats: 
COMPLETE Column stats: NONE
+  tag: 0
+  value expressions: _col1 (type: int)
+  auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -169,21 +173,25 @@ STAGE PLANS:
 Map 3 
 Map Operator Tree:
 TableScan
-  alias: b
+  alias: a
   Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
   GatherStats: false
   Filter Operator
 isSamplingPred: false
 predicate: (value = 50) (type: boolean)
 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
-Reduce Output Operator
-  key expressions: key (type: int)
-  sort order: +
-  Map-reduce partition columns: key (type: int)
+Select Operator
+  expressions: key (type: int), 50 (type: int)
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 1 Data size: 6 Basic stats: 
COMPLETE Column stats: NONE
-  tag: 1
-  value expressions: value (type: int)
-  auto parallelism: false
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 1 Data size: 6 Basic stats: 
COMPLETE Column stats: NONE
+tag: 1
+value expressions: _col1 (type: int)
+auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -232,25 +240,29 @@ STAGE PLANS:
 name: default.a
   name: default.a
 Truncated Path -> Alias:
-  /a [b]
+  /a [a]
 Map 4 
 Map Operator Tree:
 TableScan
-  alias: c
+  alias: a
   Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
   GatherStats: false
   Filter Operator
 isSamplingPred: false
 predicate: (value = 60) (type: boolean)
 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
-Reduce Output Operator
-  key expressions: key (type: int)
-  sort order: +
-  Map-reduce partition columns: key (type: int)
+Select Operator
+  expressions: key (type: int), 60 (type: int)
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 1 Data size: 6 Basic stats: 
COMPLETE Column stats: NONE
-  tag: 2
-  value expressions: value (type: int)
-  auto parallelism: false
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+

[66/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q
--
diff --git a/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q 
b/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q
index 9110dcc..82c18e2 100644
--- a/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q
+++ b/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q
@@ -1,6 +1,6 @@
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.enforce.bucketing=true;
+
 
 create table acid_danp(a int, b varchar(128)) clustered by (a) into 2 buckets 
stored as orc TBLPROPERTIES ('transactional'='true');
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_all_partitioned.q
--
diff --git a/ql/src/test/queries/clientpositive/delete_all_partitioned.q 
b/ql/src/test/queries/clientpositive/delete_all_partitioned.q
index f082b6d..122b3e2 100644
--- a/ql/src/test/queries/clientpositive/delete_all_partitioned.q
+++ b/ql/src/test/queries/clientpositive/delete_all_partitioned.q
@@ -1,6 +1,6 @@
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.enforce.bucketing=true;
+
 
 create table acid_dap(a int, b varchar(128)) partitioned by (ds string) 
clustered by (a) into 2 buckets stored as orc TBLPROPERTIES 
('transactional'='true');
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_orig_table.q
--
diff --git a/ql/src/test/queries/clientpositive/delete_orig_table.q 
b/ql/src/test/queries/clientpositive/delete_orig_table.q
index fd23f4b..88cc830 100644
--- a/ql/src/test/queries/clientpositive/delete_orig_table.q
+++ b/ql/src/test/queries/clientpositive/delete_orig_table.q
@@ -1,6 +1,6 @@
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.enforce.bucketing=true;
+
 
 dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table;
 dfs -copyFromLocal ../../data/files/alltypesorc 
${system:test.tmp.dir}/delete_orig_table/0_0; 

http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_tmp_table.q
--
diff --git a/ql/src/test/queries/clientpositive/delete_tmp_table.q 
b/ql/src/test/queries/clientpositive/delete_tmp_table.q
index eb6c095..c7d8aa6 100644
--- a/ql/src/test/queries/clientpositive/delete_tmp_table.q
+++ b/ql/src/test/queries/clientpositive/delete_tmp_table.q
@@ -1,6 +1,6 @@
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.enforce.bucketing=true;
+
 
 create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 
buckets stored as orc TBLPROPERTIES ('transactional'='true');
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_no_match.q
--
diff --git a/ql/src/test/queries/clientpositive/delete_where_no_match.q 
b/ql/src/test/queries/clientpositive/delete_where_no_match.q
index 8ed979d..f13dd73 100644
--- a/ql/src/test/queries/clientpositive/delete_where_no_match.q
+++ b/ql/src/test/queries/clientpositive/delete_where_no_match.q
@@ -1,6 +1,6 @@
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.enforce.bucketing=true;
+
 
 create table acid_dwnm(a int, b varchar(128)) clustered by (a) into 2 buckets 
stored as orc TBLPROPERTIES ('transactional'='true');
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q
--
diff --git a/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q 
b/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q
index dac5375..de1ca36 100644
--- a/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q
+++ b/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q
@@ -1,6 +1,6 @@
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.enforce.bucketing=true;
+
 
 create table acid_dwnp(a int, b varchar(128)) clustered by (a) into 2 buckets 
stored as orc TBLPROPERTIES ('transactional'='true');
 

http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_partitioned.q
--
diff --git 

[85/91] [abbrv] hive git commit: HIVE-12008: Hive queries failing when using count(*) on column in view (Yongzhi Chen, reviewed by Szehon ho)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/test/results/clientpositive/union9.q.out
--
diff --git a/ql/src/test/results/clientpositive/union9.q.out 
b/ql/src/test/results/clientpositive/union9.q.out
index ec7ab35..5f54210 100644
--- a/ql/src/test/results/clientpositive/union9.q.out
+++ b/ql/src/test/results/clientpositive/union9.q.out
@@ -24,73 +24,67 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: s1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
 Select Operator
-  expressions: value (type: string)
-  outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
   Union
-Statistics: Num rows: 1500 Data size: 15936 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
 Select Operator
-  Statistics: Num rows: 1500 Data size: 15936 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
   Group By Operator
 aggregations: count(1)
 mode: hash
 outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col0 (type: bigint)
   TableScan
 alias: s1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
 Select Operator
-  expressions: value (type: string)
-  outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
   Union
-Statistics: Num rows: 1500 Data size: 15936 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
 Select Operator
-  Statistics: Num rows: 1500 Data size: 15936 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
   Group By Operator
 aggregations: count(1)
 mode: hash
 outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col0 (type: bigint)
   TableScan
 alias: s1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
 Select Operator
-  expressions: value (type: string)
-  outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
   Union
-Statistics: Num rows: 1500 Data size: 15936 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
 Select Operator
-  Statistics: Num rows: 1500 Data size: 15936 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
   

[77/91] [abbrv] hive git commit: HIVE-12473 : DPP: UDFs on the partition column side does not evaluate correctly (Sergey Shelukhin, reviewed by Gopal V)

2015-11-29 Thread xuefu
HIVE-12473 : DPP: UDFs on the partition column side does not evaluate correctly 
(Sergey Shelukhin, reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/454c2cae
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/454c2cae
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/454c2cae

Branch: refs/heads/spark
Commit: 454c2cae4f0a3db1f225da6c283c161fe09bfb87
Parents: 74c0c97
Author: Sergey Shelukhin 
Authored: Wed Nov 25 12:15:29 2015 -0800
Committer: Sergey Shelukhin 
Committed: Wed Nov 25 12:15:29 2015 -0800

--
 .../ql/exec/tez/DynamicPartitionPruner.java | 40 ++--
 1 file changed, 29 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/454c2cae/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
index b67ac8d..60b71aa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
@@ -18,6 +18,12 @@
 
 package org.apache.hadoop.hive.ql.exec.tez;
 
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
 import java.io.DataInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -242,25 +248,37 @@ public class DynamicPartitionPruner {
   LOG.debug(sb.toString());
 }
 
-ObjectInspector oi =
-
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory
-.getPrimitiveTypeInfo(si.fieldInspector.getTypeName()));
+ObjectInspector targetOi = findTargetOi(si.partKey, si.columnName);
+Converter converter = ObjectInspectorConverters.getConverter(
+PrimitiveObjectInspectorFactory.javaStringObjectInspector, 
targetOi);
 
-Converter converter =
-ObjectInspectorConverters.getConverter(
-PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
-
-StructObjectInspector soi =
-ObjectInspectorFactory.getStandardStructObjectInspector(
-Collections.singletonList(columnName), 
Collections.singletonList(oi));
+StructObjectInspector soi = 
ObjectInspectorFactory.getStandardStructObjectInspector(
+Collections.singletonList(columnName), 
Collections.singletonList(targetOi));
 
 @SuppressWarnings("rawtypes")
 ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(si.partKey);
-eval.initialize(soi);
+eval.initialize(soi); // We expect the row with just the relevant column.
 
 applyFilterToPartitions(converter, eval, columnName, values);
   }
 
+  private ObjectInspector findTargetOi(ExprNodeDesc expr, String columnName) {
+if (expr instanceof ExprNodeColumnDesc) {
+  ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr;
+  // TODO: this is not necessarily going to work for all cases. At least, 
table name is needed.
+  //   Also it's not clear if this is going to work with subquery 
columns and such.
+  if (columnName.equals(colExpr.getColumn())) {
+return 
PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+(PrimitiveTypeInfo)colExpr.getTypeInfo());
+  }
+}
+for (ExprNodeDesc child : expr.getChildren()) {
+  ObjectInspector oi = findTargetOi(child, columnName);
+  if (oi != null) return oi;
+}
+return null;
+  }
+
   @SuppressWarnings("rawtypes")
   private void applyFilterToPartitions(Converter converter, ExprNodeEvaluator 
eval,
   String columnName, Set values) throws HiveException {



[91/91] [abbrv] hive git commit: Merge branch 'master' into spark

2015-11-29 Thread xuefu
Merge branch 'master' into spark


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79035f1c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79035f1c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79035f1c

Branch: refs/heads/spark
Commit: 79035f1c520bbc9a900723b5b9c065b67fde636d
Parents: eddb8ca 0a96201
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Sun Nov 29 16:05:29 2015 -0800
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Sun Nov 29 16:05:29 2015 -0800

--
 accumulo-handler/pom.xml|8 +-
 ant/pom.xml |6 +
 .../java/org/apache/hive/beeline/BeeLine.java   |   13 +-
 .../apache/hive/beeline/cli/TestHiveCli.java|1 +
 beeline/src/test/resources/hive-site.xml|5 +
 bin/ext/version.sh  |2 +-
 bin/hive|   23 +-
 .../org/apache/hadoop/hive/cli/CliDriver.java   |   11 +
 common/pom.xml  |   28 +-
 .../hadoop/hive/common/StatsSetupConst.java |   13 -
 .../hive/common/metrics/LegacyMetrics.java  |   27 +-
 .../hive/common/metrics/common/Metrics.java |   28 +-
 .../common/metrics/common/MetricsConstant.java  |6 +
 .../common/metrics/common/MetricsScope.java |   33 +
 .../metrics/metrics2/CodahaleMetrics.java   |   41 +-
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   96 +-
 .../apache/hadoop/hive/ql/log/PerfLogger.java   |   27 +
 .../apache/hive/common/util/BloomFilter.java|   18 +-
 .../org/apache/hive/common/util/Murmur3.java|  107 +-
 .../hive/http/AdminAuthorizedServlet.java   |   45 +
 .../java/org/apache/hive/http/ConfServlet.java  |  101 +
 .../java/org/apache/hive/http/HttpServer.java   |  316 ++
 .../org/apache/hive/http/JMXJsonServlet.java|  412 +++
 .../hive/common/metrics/MetricsTestUtils.java   |   61 +
 .../hive/common/metrics/TestLegacyMetrics.java  |   46 +-
 .../metrics/metrics2/TestCodahaleMetrics.java   |   14 +-
 .../apache/hive/common/util/TestMurmur3.java|   45 +-
 data/conf/hive-site.xml |5 +
 data/conf/llap/hive-site.xml|7 +-
 data/conf/spark/standalone/hive-site.xml|7 +-
 data/conf/spark/yarn-client/hive-site.xml   |6 +-
 data/conf/tez/hive-site.xml |9 +-
 .../hive/hbase/HiveHBaseTableInputFormat.java   |   10 +
 .../test/results/positive/hbase_queries.q.out   |   13 +-
 .../src/test/templates/TestHBaseCliDriver.vm|   63 +-
 .../templates/TestHBaseNegativeCliDriver.vm |   64 +-
 .../mapreduce/FosterStorageHandler.java |   37 +
 .../hive/hcatalog/mapreduce/InputJobInfo.java   |8 +-
 .../rcfile/RCFileMapReduceInputFormat.java  |8 +-
 .../rcfile/TestRCFileMapReduceInputFormat.java  |4 +-
 .../streaming/AbstractRecordWriter.java |   51 +-
 .../hcatalog/streaming/ConnectionError.java |3 +-
 .../streaming/DelimitedInputWriter.java |5 +-
 .../hive/hcatalog/streaming/HiveEndPoint.java   |  211 +-
 .../hcatalog/streaming/StrictJsonWriter.java|   11 +-
 .../hcatalog/streaming/TransactionBatch.java|1 +
 .../hcatalog/streaming/TransactionError.java|2 +-
 .../hive/hcatalog/streaming/TestStreaming.java  |  175 +-
 .../streaming/mutate/StreamingAssert.java   |2 +
 hplsql/pom.xml  |5 -
 itests/hive-unit/pom.xml|7 +
 ...estDDLWithRemoteMetastoreSecondNamenode.java |3 +-
 .../hive/ql/txn/compactor/TestCompactor.java|  246 +-
 .../org/apache/hive/jdbc/TestJdbcDriver2.java   |   89 +-
 .../hive/jdbc/miniHS2/TestHs2Metrics.java   |  116 +
 itests/qtest-accumulo/pom.xml   |2 +-
 .../test/resources/testconfiguration.properties |   23 +-
 .../hadoop/hive/hbase/HBaseTestSetup.java   |9 +-
 .../org/apache/hadoop/hive/ql/QTestUtil.java|   11 +-
 jdbc/pom.xml|   52 +
 .../hive/llap/io/api/impl/LlapInputFormat.java  |   40 +-
 .../hadoop/hive/metastore/HiveMetaStore.java|9 +-
 .../hive/metastore/MetaStoreDirectSql.java  |   12 +
 .../metastore/txn/CompactionTxnHandler.java |  170 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |  533 +++-
 .../metastore/txn/TestCompactionTxnHandler.java |   37 -
 .../hive/metastore/txn/TestTxnHandler.java  |   10 +-
 packaging/pom.xml   |   15 +-
 pom.xml |   15 +-
 ql/pom.xml  |   42 +-
 .../java/org/apache/hadoop/hive/ql/Driver.java  |   30 +-
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |8 +-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |5 +-
 .../hadoop/hive/ql/exec/FetchOperator.java  |   10 +-
 .../a

[11/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
index 23530bd..ff57c08 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
@@ -142,26 +142,30 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 29 Data size: 2906 Basic stats: 
COMPLETE Column stats: NONE
-Sorted Merge Bucket Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: string)
-1 key (type: string)
-  Position of Big Table: 1
-  Statistics: Num rows: 31 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
-  BucketMapJoin: true
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 29 Data size: 2906 Basic stats: 
COMPLETE Column stats: NONE
+  Sorted Merge Bucket Map Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: string)
+  1 _col0 (type: string)
+Position of Big Table: 1
+Statistics: Num rows: 31 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
+BucketMapJoin: true
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
   Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  tag: -1
-  value expressions: _col0 (type: bigint)
-  auto parallelism: false
+  Reduce Output Operator
+sort order: 
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+tag: -1
+value expressions: _col0 (type: bigint)
+auto parallelism: false
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -214,7 +218,7 @@ STAGE PLANS:
 name: default.bucket_big
   name: default.bucket_big
 Truncated Path -> Alias:
-  /bucket_big/ds=2008-04-08 [b]
+  /bucket_big/ds=2008-04-08 [$hdt$_1:b]
 Reducer 2 
 Needs Tagging: false
 Reduce Operator Tree:
@@ -326,26 +330,30 @@ STAGE PLANS:
 isSamplingPred: false
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 29 Data size: 2906 Basic stats: 
COMPLETE Column stats: NONE
-Sorted Merge Bucket Map Join Operator
-  condition map:
-   Inner Join 0 to 1
-  keys:
-0 key (type: string)
-1 key (type: string)
-  Position of Big Table: 0
-  Statistics: Num rows: 31 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
-  BucketMapJoin: true
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
+Select Operator
+  expressions: key (type: string)
+  outputColumnNames: _col0
+  Statistics: Num rows: 29 Data size: 2906 Basic stats: 
COMPLETE Column stats: NONE
+  Sorted Merge Bucket Map Join Operator
+condition 

[51/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2015-11-29 Thread xuefu
HIVE-12017: Do not disable CBO by default when number of joins in a query is 
equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7dab21ac
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7dab21ac
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7dab21ac

Branch: refs/heads/spark
Commit: 7dab21acffdd43e80e6fefb5011139bbf30fa541
Parents: 55b589e
Author: Jesus Camacho Rodriguez 
Authored: Sat Nov 14 09:12:49 2015 +0100
Committer: Jesus Camacho Rodriguez 
Committed: Fri Nov 20 23:31:11 2015 +0100

--
 .../test/results/positive/hbase_queries.q.out   |   12 +-
 .../ql/optimizer/calcite/HiveRelOptUtil.java|   40 +
 .../rules/HiveAggregateProjectMergeRule.java|   13 +-
 .../translator/SqlFunctionConverter.java|8 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java|  169 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   16 -
 ql/src/test/queries/clientpositive/mergejoin.q  |9 +
 .../bucket_mapjoin_mismatch1.q.out  |   36 +-
 .../clientnegative/join_nonexistent_part.q.out  |1 -
 .../annotate_stats_join_pkfk.q.out  |  230 +-
 .../archive_excludeHadoop20.q.out   |1 +
 .../results/clientpositive/archive_multi.q.out  |1 +
 .../results/clientpositive/auto_join1.q.out |   12 +-
 .../results/clientpositive/auto_join10.q.out|   12 +-
 .../results/clientpositive/auto_join11.q.out|   12 +-
 .../results/clientpositive/auto_join14.q.out|8 +-
 .../results/clientpositive/auto_join24.q.out|   54 +-
 .../results/clientpositive/auto_join26.q.out|   30 +-
 .../results/clientpositive/auto_join32.q.out|  153 +-
 .../clientpositive/auto_join_filters.q.out  |   12 +-
 .../clientpositive/auto_join_nulls.q.out|2 +-
 .../auto_join_reordering_values.q.out   |  152 +-
 .../clientpositive/auto_join_stats.q.out|  392 ++--
 .../clientpositive/auto_join_stats2.q.out   |  214 +-
 .../clientpositive/auto_smb_mapjoin_14.q.out|  145 +-
 .../clientpositive/auto_sortmerge_join_1.q.out  |  240 +-
 .../clientpositive/auto_sortmerge_join_10.q.out |  103 +-
 .../clientpositive/auto_sortmerge_join_11.q.out |  124 +-
 .../clientpositive/auto_sortmerge_join_12.q.out |  177 +-
 .../clientpositive/auto_sortmerge_join_14.q.out |  152 +-
 .../clientpositive/auto_sortmerge_join_15.q.out |  152 +-
 .../clientpositive/auto_sortmerge_join_2.q.out  |  198 +-
 .../clientpositive/auto_sortmerge_join_3.q.out  |  234 +-
 .../clientpositive/auto_sortmerge_join_4.q.out  |  234 +-
 .../clientpositive/auto_sortmerge_join_5.q.out  |  232 +-
 .../clientpositive/auto_sortmerge_join_6.q.out  | 1140 --
 .../clientpositive/auto_sortmerge_join_7.q.out  |  242 +-
 .../clientpositive/auto_sortmerge_join_8.q.out  |  242 +-
 .../clientpositive/auto_sortmerge_join_9.q.out  | 1240 +-
 .../clientpositive/bucket_map_join_spark1.q.out |  206 +-
 .../clientpositive/bucket_map_join_spark2.q.out |  206 +-
 .../clientpositive/bucket_map_join_spark3.q.out |  206 +-
 .../clientpositive/bucket_map_join_spark4.q.out |  236 +-
 .../bucketsortoptimize_insert_2.q.out   |  218 +-
 .../bucketsortoptimize_insert_4.q.out   |  112 +-
 .../bucketsortoptimize_insert_5.q.out   |  142 +-
 .../bucketsortoptimize_insert_6.q.out   |  554 +++--
 .../bucketsortoptimize_insert_7.q.out   |   48 +-
 .../bucketsortoptimize_insert_8.q.out   |   76 +-
 .../cbo_rp_cross_product_check_2.q.out  |  468 ++--
 .../results/clientpositive/cbo_rp_join1.q.out   |   60 +-
 .../clientpositive/cbo_rp_lineage2.q.out|   18 +-
 .../clientpositive/column_access_stats.q.out|   84 +-
 .../results/clientpositive/constprog2.q.out |   20 +-
 .../clientpositive/constprog_partitioner.q.out  |   10 +-
 .../clientpositive/correlationoptimizer1.q.out  |  886 
 .../clientpositive/correlationoptimizer11.q.out |  120 +-
 .../clientpositive/correlationoptimizer13.q.out |   26 +-
 .../clientpositive/correlationoptimizer4.q.out  |  972 
 .../clientpositive/correlationoptimizer5.q.out  |  248 +-
 .../clientpositive/correlationoptimizer9.q.out  |  250 +-
 .../results/clientpositive/create_view.q.out|2 +-
 .../clientpositive/cross_product_check_1.q.out  |  252 ++-
 .../clientpositive/cross_product_check_2.q.out  |  332 +--
 .../results/clientpositive/decimal_join2.q.out  |   90 +-
 .../clientpositive/dynamic_rdd_cache.q.out  |  265 ++-
 .../encryption_join_unencrypted_tbl.q.out   |   98 +-
 ...on_join_with_different_encryption_keys.q.out |  102 +-
 .../clientpositive/explain_logical.q.out|  142 +-
 .../clientpositive/explain_rearrange.q.out  |  288 +--
 .../clientpositive/filter_join_breaktask.q.out  |   86 +-
 

  1   2   3   4   5   6   7   8   9   10   >