svn commit: r1838505 - /hive/cms/trunk/content/people.mdtext
Author: xuefu Date: Mon Aug 20 22:09:32 2018 New Revision: 1838505 URL: http://svn.apache.org/viewvc?rev=1838505=rev Log: Update Xuefu's org in the committer list Modified: hive/cms/trunk/content/people.mdtext Modified: hive/cms/trunk/content/people.mdtext URL: http://svn.apache.org/viewvc/hive/cms/trunk/content/people.mdtext?rev=1838505=1838504=1838505=diff == --- hive/cms/trunk/content/people.mdtext (original) +++ hive/cms/trunk/content/people.mdtext Mon Aug 20 22:09:32 2018 @@ -334,7 +334,7 @@ tr:nth-child(2n+1) { xuefu Xuefu Zhang - +https://www.alibaba.com/;>Alibaba Inc
hive git commit: HIVE-17257: Hive should merge empty files (Chao via Xuefu)
Repository: hive Updated Branches: refs/heads/master ad1243bef -> 9816cfb44 HIVE-17257: Hive should merge empty files (Chao via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9816cfb4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9816cfb4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9816cfb4 Branch: refs/heads/master Commit: 9816cfb44ad91a8c2a030e540a703983862e4123 Parents: ad1243b Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Jan 18 11:26:54 2018 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Jan 18 11:26:54 2018 -0800 -- .../ql/plan/ConditionalResolverMergeFiles.java | 2 +- .../test/queries/clientpositive/merge_empty.q | 14 ++ .../results/clientpositive/merge_empty.q.out| 45 3 files changed, 60 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 129347b..ebf2298 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -408,7 +408,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, */ private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) { AverageSize averageSize = getAverageSize(inpFs, dirPath); -if (averageSize.getTotalSize() <= 0) { +if (averageSize.getTotalSize() < 0) { return -1; } http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/test/queries/clientpositive/merge_empty.q -- diff --git a/ql/src/test/queries/clientpositive/merge_empty.q b/ql/src/test/queries/clientpositive/merge_empty.q new file mode 100644 index 000..188b39e --- /dev/null +++ b/ql/src/test/queries/clientpositive/merge_empty.q @@ -0,0 +1,14 @@ +set hive.merge.mapredfiles=true; +set hive.merge.sparkfiles=true; +set hive.auto.convert.join=false; +set mapreduce.job.reduces=1000; + +create table dummy (a string); +insert overwrite directory '/tmp/test' select src.key from src join dummy on src.key = dummy.a; +dfs -ls /tmp/test; + +-- verify that this doesn't merge for bucketed tables +create table foo (a bigint, b string) clustered by (a) into 256 buckets; +create table bar (a bigint, b string); +insert overwrite table foo select * from bar; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/foo; http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/test/results/clientpositive/merge_empty.q.out -- diff --git a/ql/src/test/results/clientpositive/merge_empty.q.out b/ql/src/test/results/clientpositive/merge_empty.q.out new file mode 100644 index 000..c13cbf4 --- /dev/null +++ b/ql/src/test/results/clientpositive/merge_empty.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: create table dummy (a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: create table dummy (a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy + A masked pattern was here +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Input: default@src + A masked pattern was here +PREHOOK: query: create table foo (a bigint, b string) clustered by (a) into 256 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (a bigint, b string) clustered by (a) into 256 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: create table bar (a bigint, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bar +POSTHOOK: query: create table bar (a bigint, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bar +PREHOOK: query: insert overwrite table foo select * from bar +PREHOOK: type: QUERY +PREHOOK: Input: default@bar +PREHOOK: Output: default@foo +POSTHOOK: query: insert overwrite table foo select * from bar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bar +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.a SIMPLE [(bar)bar.FieldSchem
[4/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out new file mode 100644 index 000..144c3ec --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out @@ -0,0 +1,182 @@ +PREHOOK: query: CREATE TABLE table_7 (int_col INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_7 +POSTHOOK: query: CREATE TABLE table_7 (int_col INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_7 +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +PREHOOK: query: explain +SELECT +(t1.int_col) * (t1.int_col) AS int_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) t1 +WHERE +(False) NOT IN (SELECT +False AS boolean_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) tt1 +WHERE +(t1.int_col) = (tt1.int_col)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT +(t1.int_col) * (t1.int_col) AS int_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) t1 +WHERE +(False) NOT IN (SELECT +False AS boolean_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) tt1 +WHERE +(t1.int_col) = (tt1.int_col)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) +Reducer 5 <- Map 4 (GROUP, 1) +Reducer 7 <- Map 6 (GROUP, 2) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: table_7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Map 4 +Map Operator Tree: +TableScan + alias: table_7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) +Map 6 +Map Operator Tree: +TableScan + alias: table_7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator +keys: false (type: boolean) +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Reducer 2 +Reduce Operator Tree: + Join Operator +
[6/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out b/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out new file mode 100644 index 000..15f33f0 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part + A masked pattern was here +POSTHOOK: query: select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part + A masked pattern was here +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN31 LG DRUM 1789.69 ickly ir +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN17 SM CASE 1671.68 are slyly after the sl +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +48427 almond antique violet mint lemonManufacturer#4 Brand#42 PROMO POLISHED STEEL39 SM CASE 1375.42 hely ironic i +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG1414.42 arefully +15103 almond aquamarine dodger light gainsboroManufacturer#5 Brand#53ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +17927 almond aquamarine yellow dodger mintManufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN2 JUMBO BOX 1173.15 e pinto beans h +17273 almond antique forest lavender goldenrodManufacturer#3 Brand#35PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +40982 almond antique misty red olive Manufacturer#3 Brand#32ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +155733 almond antique sky peru orange Manufacturer#5 Brand#53SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
[8/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8eaf18d5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8eaf18d5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8eaf18d5 Branch: refs/heads/master Commit: 8eaf18d599909751efc4bb1e05d31e65da8a8d1e Parents: 1253450 Author: Xuefu Zhang <xu...@uber.com> Authored: Fri Oct 13 10:03:35 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Fri Oct 13 10:03:35 2017 -0700 -- .../test/resources/testconfiguration.properties |8 + .../clientpositive/spark/subquery_multi.q.out | 4129 ++ .../spark/subquery_nested_subquery.q.out| 38 + .../clientpositive/spark/subquery_notin.q.out | 7722 ++ .../spark/subquery_null_agg.q.out | 182 + .../clientpositive/spark/subquery_scalar.q.out | 6619 +++ .../clientpositive/spark/subquery_select.q.out | 5379 .../spark/subquery_shared_alias.q.out | 23 + .../clientpositive/spark/subquery_views.q.out | 598 ++ 9 files changed, 24698 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1a7c0d2..65cd79a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1266,7 +1266,15 @@ spark.query.files=add_part_multiple.q, \ statsfs.q, \ subquery_exists.q, \ subquery_in.q, \ + subquery_multi.q,\ subquery_multiinsert.q, \ + subquery_nested_subquery.q, \ + subquery_notin.q,\ + subquery_null_agg.q,\ + subquery_scalar.q,\ + subquery_select.q, \ + subquery_shared_alias.q, \ + subquery_views.q,\ table_access_keys_stats.q, \ temp_table.q, \ temp_table_gb1.q, \
[3/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out new file mode 100644 index 000..470efca --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -0,0 +1,6619 @@ +PREHOOK: query: create table tnull(i int, c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int, c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null +POSTHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@part_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@part_null +PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +PREHOOK: type: QUERY +PREHOOK: Output: default@part_null +POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@part_null +POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage +
[5/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_notin.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out new file mode 100644 index 000..d7b9a41 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -0,0 +1,7722 @@ +Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain +select * +from src +where src.key not in + ( select key from src s1 +where s1.key > '2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src +where src.key not in + ( select key from src s1 +where s1.key > '2' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +Reducer 5 <- Map 4 (GROUP, 1) +Reducer 7 <- Map 6 (GROUP, 2) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) +Map 4 +Map Operator Tree: +TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: (key > '2') (type: boolean) +Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: bigint), _col1 (type: bigint) +Map 6 +Map Operator Tree: +TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: (key > '2') (type: boolean) +Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE +Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE +Reducer 2 +Reduce Operator Tree: + Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 + 1 +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) +Reducer 3 +Reduce Operator Tree: + Join Operator +condition map: +
[7/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_multi.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out new file mode 100644 index 000..f9b2c1b --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -0,0 +1,4129 @@ +PREHOOK: query: create table tnull(i int, c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int, c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null +POSTHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@part_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@part_null +PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +PREHOOK: type: QUERY +PREHOOK: Output: default@part_null +POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@part_null +POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0
[1/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
Repository: hive Updated Branches: refs/heads/master 1253450e0 -> 8eaf18d59 http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out b/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out new file mode 100644 index 000..f907f91 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out @@ -0,0 +1,23 @@ +PREHOOK: query: select * +from src +where src.key in (select key from src where key > '9') +PREHOOK: type: QUERY +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: select * +from src +where src.key in (select key from src where key > '9') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src + A masked pattern was here +92 val_92 +96 val_96 +97 val_97 +97 val_97 +90 val_90 +90 val_90 +90 val_90 +95 val_95 +95 val_95 +98 val_98 +98 val_98 http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_views.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out b/ql/src/test/results/clientpositive/spark/subquery_views.q.out new file mode 100644 index 000..9a1c25f --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out @@ -0,0 +1,598 @@ +PREHOOK: query: create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@cv1 +POSTHOOK: query: create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cv1 +POSTHOOK: Lineage: cv1.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cv1.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe extended cv1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@cv1 +POSTHOOK: query: describe extended cv1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@cv1 +keystring +value string + + A masked pattern was here +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9'), viewExpandedText:select `b`.`key`, `b`.`value` +from `default`.`src` `b` +where exists + (select `a`.`key` + from `default`.`src` `a` + where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_9'), tableType:VIRTUAL_VIEW, rewriteEnabled:false) +PREHOOK: query: select * +from cv1 where cv1.key in (select key from cv1 c where c.key > '95') +PREHOOK: type: QUERY +PREHOOK: Input: default@cv1 +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: select * +from cv1 where cv1.key in (select key from cv1 c where c.key > '95') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cv1 +POSTHOOK: Input: default@src + A masked pattern was here +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: create view cv2 as +select * +from src b +where b.key not in + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_11' + ) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@cv2 +POSTHOOK: query: create view cv2 as +select * +from src b +where b.key not in + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_11' + ) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cv2 +POSTHOOK: Lineage: cv2.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cv2.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe extended cv2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@cv2 +POSTHOOK: query: describe extended cv2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@cv2 +keystring +value string + + A
[2/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_select.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out new file mode 100644 index 000..c3f3d58 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -0,0 +1,5379 @@ +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain SELECT p_size, p_size IN ( +SELECT MAX(p_size) FROM part) +FROM part +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT p_size, p_size IN ( +SELECT MAX(p_size) FROM part) +FROM part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +Reducer 5 <- Map 4 (GROUP, 1) +Reducer 7 <- Map 4 (GROUP, 1) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: p_size (type: int) +outputColumnNames: _col0 +Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) +Map 4 +Map Operator Tree: +TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: p_size (type: int) +outputColumnNames: p_size +Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: max(p_size) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int) +Reducer 2 +Reduce Operator Tree: + Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 + 1 +outputColumnNames: _col0, _col1, _col2 +Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) +Reducer 3 +Reduce Operator Tree: + Join Operator +condition map: + Left Outer Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col2, _col4 +Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: _col0 (type: int), CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (null) ELSE (false) END (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde:
hive git commit: HIVE-17401: Hive session idle timeout doesn't function properly (Reviewed by Peter Vary)
Repository: hive Updated Branches: refs/heads/master 9a5381cb9 -> 660e39e03 HIVE-17401: Hive session idle timeout doesn't function properly (Reviewed by Peter Vary) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/660e39e0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/660e39e0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/660e39e0 Branch: refs/heads/master Commit: 660e39e03b68f6d256a6d4fd41193503a1f711c9 Parents: 9a5381c Author: Xuefu Zhang <xu...@uber.com> Authored: Wed Sep 6 10:06:01 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Wed Sep 6 10:06:01 2017 -0700 -- .../service/cli/session/HiveSessionImpl.java| 26 1 file changed, 10 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/660e39e0/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java -- diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 57bb53c..906565c 100644 --- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -111,9 +111,8 @@ public class HiveSessionImpl implements HiveSession { // TODO: the control flow for this needs to be defined. Hive is supposed to be thread-local. private Hive sessionHive; - private volatile long lastAccessTime; - private volatile long lastIdleTime; - private volatile int activeCalls = 0; + private volatile long lastAccessTime = System.currentTimeMillis(); + private volatile boolean lockedByUser; private final Semaphore operationLock; @@ -184,7 +183,6 @@ public class HiveSessionImpl implements HiveSession { configureSession(sessionConfMap); } lastAccessTime = System.currentTimeMillis(); -lastIdleTime = lastAccessTime; } /** @@ -384,12 +382,11 @@ public class HiveSessionImpl implements HiveSession { sessionState.setIsUsingThriftJDBCBinarySerDe(updateIsUsingThriftJDBCBinarySerDe()); if (userAccess) { lastAccessTime = System.currentTimeMillis(); + lockedByUser = true; } // set the thread name with the logging prefix. sessionState.updateThreadName(); Hive.set(sessionHive); -activeCalls++; -lastIdleTime = 0; } /** @@ -424,12 +421,7 @@ public class HiveSessionImpl implements HiveSession { } if (userAccess) { lastAccessTime = System.currentTimeMillis(); -} -activeCalls--; -// lastIdleTime is only set by the last one -// who calls release with empty opHandleSet. -if (activeCalls == 0 && opHandleSet.isEmpty()) { - lastIdleTime = System.currentTimeMillis(); + lockedByUser = false; } } @@ -830,16 +822,18 @@ public class HiveSessionImpl implements HiveSession { @Override public long getNoOperationTime() { -return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0; +boolean noMoreOpHandle = false; +synchronized (opHandleSet) { + noMoreOpHandle = opHandleSet.isEmpty(); +} +return noMoreOpHandle && !lockedByUser ? System.currentTimeMillis() - lastAccessTime : 0; } private void closeTimedOutOperations(List operations) { acquire(false, false); try { for (Operation operation : operations) { -synchronized (opHandleSet) { - opHandleSet.remove(operation.getHandle()); -} +removeOpHandle(operation.getHandle()); try { operation.close(); } catch (Exception e) {
hive git commit: HIVE-16961: Hive on Spark leaks spark application in case user cancels query and closes session (reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 26f1bdeb4 -> 0731dab18 HIVE-16961: Hive on Spark leaks spark application in case user cancels query and closes session (reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0731dab1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0731dab1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0731dab1 Branch: refs/heads/master Commit: 0731dab18c85363d4bad8a556c437a587277143c Parents: 26f1bde Author: Xuefu Zhang <xu...@uber.com> Authored: Wed Jul 5 10:33:18 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Wed Jul 5 10:33:18 2017 -0700 -- .../org/apache/hive/spark/client/SparkClientImpl.java| 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0731dab1/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index bf7e8db..03e773a 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -508,16 +508,19 @@ class SparkClientImpl implements SparkClient { } } - rpcServer.cancelClient(clientId, - "Child process exited before connecting back with error log " + errStr.toString()); LOG.warn("Child process exited with code {}", exitCode); + rpcServer.cancelClient(clientId, + "Child process (spark-submit) exited before connecting back with error log " + errStr.toString()); } } catch (InterruptedException ie) { -LOG.warn("Waiting thread interrupted, killing child process."); +LOG.warn("Thread waiting on the child process (spark-submit) is interrupted, killing the child process."); +rpcServer.cancelClient(clientId, "Thread waiting on the child porcess (spark-submit) is interrupted"); Thread.interrupted(); child.destroy(); } catch (Exception e) { -LOG.warn("Exception while waiting for child process.", e); +String errMsg = "Exception while waiting for child process (spark-submit)"; +LOG.warn(errMsg, e); +rpcServer.cancelClient(clientId, errMsg); } } };
hive git commit: HIVE-16962: Better error msg for Hive on Spark in case user cancels query and closes session (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 539896482 -> 10944ee34 HIVE-16962: Better error msg for Hive on Spark in case user cancels query and closes session (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/10944ee3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/10944ee3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/10944ee3 Branch: refs/heads/master Commit: 10944ee34a39efc0503ca917d1153751e1d495d2 Parents: 5398964 Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Jun 29 10:01:05 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Jun 29 10:01:05 2017 -0700 -- .../hive/ql/exec/spark/session/SparkSessionImpl.java | 9 - .../org/apache/hive/spark/client/SparkClientImpl.java | 14 ++ 2 files changed, 18 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/10944ee3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java index 51c6715..8224ef9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java @@ -56,13 +56,18 @@ public class SparkSessionImpl implements SparkSession { @Override public void open(HiveConf conf) throws HiveException { +LOG.info("Trying to open Spark session {}", sessionId); this.conf = conf; isOpen = true; try { hiveSparkClient = HiveSparkClientFactory.createHiveSparkClient(conf); } catch (Throwable e) { - throw new HiveException("Failed to create spark client.", e); + // It's possible that user session is closed while creating Spark client. + String msg = isOpen ? "Failed to create Spark client for Spark session " + sessionId : +"Spark Session " + sessionId + " is closed before Spark client is created"; + throw new HiveException(msg, e); } +LOG.info("Spark session {} is successfully opened", sessionId); } @Override @@ -121,10 +126,12 @@ public class SparkSessionImpl implements SparkSession { @Override public void close() { +LOG.info("Trying to close Spark session {}", sessionId); isOpen = false; if (hiveSparkClient != null) { try { hiveSparkClient.close(); +LOG.info("Spark session {} is successfully closed", sessionId); cleanScratchDir(); } catch (IOException e) { LOG.error("Failed to close spark session (" + sessionId + ").", e); http://git-wip-us.apache.org/repos/asf/hive/blob/10944ee3/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index e40aa6b..bf7e8db 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -107,19 +107,25 @@ class SparkClientImpl implements SparkClient { // The RPC server will take care of timeouts here. this.driverRpc = rpcServer.registerClient(clientId, secret, protocol).get(); } catch (Throwable e) { + String errorMsg = null; if (e.getCause() instanceof TimeoutException) { -LOG.error("Timed out waiting for client to connect.\nPossible reasons include network " + +errorMsg = "Timed out waiting for client to connect.\nPossible reasons include network " + "issues, errors in remote driver or the cluster has no available resources, etc." + -"\nPlease check YARN or Spark driver's logs for further information.", e); +"\nPlease check YARN or Spark driver's logs for further information."; + } else if (e.getCause() instanceof InterruptedException) { +errorMsg = "Interruption occurred while waiting for client to connect.\nPossibly the Spark session is closed " + +"such as in case of query cancellation." + +"\nPlease refer to HiveServer2 logs for further information."; } else { -LOG.error("Error while waiting for client to connect.", e); +errorMsg = "Error while w
[1/2] hive git commit: HIVE-16799: Control the max number of task for a stage in a spark job (Reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 690a9f8e2 -> 788d486e8 HIVE-16799: Control the max number of task for a stage in a spark job (Reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/363ffe0a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/363ffe0a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/363ffe0a Branch: refs/heads/master Commit: 363ffe0ac7dec7e4804c1eb2ba76cb07660ae020 Parents: b560f49 Author: Xuefu Zhang <xu...@uber.com> Authored: Fri Jun 2 11:26:33 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Fri Jun 2 11:26:33 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../test/resources/testconfiguration.properties | 3 +- .../hadoop/hive/cli/control/CliConfigs.java | 1 + .../hadoop/hive/ql/exec/spark/SparkTask.java| 2 +- .../spark/status/RemoteSparkJobMonitor.java | 14 .../ql/exec/spark/status/SparkJobMonitor.java | 12 +++ .../clientnegative/spark_stage_max_tasks.q | 6 ++ .../spark/spark_stage_max_tasks.q.out | 77 8 files changed, 115 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 176d36f..fce8db3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3371,6 +3371,8 @@ public class HiveConf extends Configuration { "Turn this off when there is a memory issue."), SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of tasks a Spark job may have.\n" + "If a Spark job contains more tasks than the maximum, it will be cancelled. A value of -1 means no limit."), +SPARK_STAGE_MAX_TASKS("hive.spark.stage.max.tasks", -1, "The maximum number of tasks a stage in a Spark job may have.\n" + +"If a Spark job stage contains more tasks than the maximum, the job will be cancelled. A value of -1 means no limit."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e613374..62462bd 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1459,5 +1459,6 @@ spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ groupby3_map_skew_multi_distinct.q,\ groupby3_multi_distinct.q,\ groupby_grouping_sets7.q,\ - spark_job_max_tasks.q + spark_job_max_tasks.q,\ + spark_stage_max_tasks.q http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java -- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 1457db0..27b87fb 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -288,6 +288,7 @@ public class CliConfigs { excludesFrom(testConfigProps, "minimr.query.negative.files"); excludeQuery("authorization_uri_import.q"); excludeQuery("spark_job_max_tasks.q"); +excludeQuery("spark_stage_max_tasks.q"); setResultsDir("ql/src/test/results/clientnegative"); setLogDir("itests/qtest/target/qfile-results/clientnegative"); http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index b4fb49f..2ee8c93 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/
[2/2] hive git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/788d486e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/788d486e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/788d486e Branch: refs/heads/master Commit: 788d486e8fbf58919d04f15e965050f1e885093f Parents: 363ffe0 690a9f8 Author: Xuefu Zhang <xu...@uber.com> Authored: Fri Jun 2 11:27:38 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Fri Jun 2 11:27:38 2017 -0700 -- bin/ext/version.sh | 2 +- bin/hive| 20 ++-- .../hive/http/Log4j2ConfiguratorServlet.java| 18 +++--- .../hadoop/hive/metastore/HiveAlterHandler.java | 4 ++-- .../hadoop/hive/metastore/MetaStoreUtils.java | 14 +++--- .../hive/metastore/TestMetaStoreUtils.java | 16 +--- .../fast/VectorMapJoinFastHashTable.java| 3 ++- 7 files changed, 38 insertions(+), 39 deletions(-) --
hive git commit: HIVE-16456: Kill spark job when InterruptedException happens or driverContext.isShutdown is true (Zhihai via Xuefu)
Repository: hive Updated Branches: refs/heads/master 067d953bf -> 4ba48aa5f HIVE-16456: Kill spark job when InterruptedException happens or driverContext.isShutdown is true (Zhihai via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4ba48aa5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4ba48aa5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4ba48aa5 Branch: refs/heads/master Commit: 4ba48aa5fcaa981ee469161bbf17611aa0392fd2 Parents: 067d953 Author: Xuefu Zhang <xu...@uber.com> Authored: Tue May 9 09:40:13 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Tue May 9 09:40:13 2017 -0700 -- .../hadoop/hive/ql/exec/spark/SparkTask.java| 32 +--- 1 file changed, 28 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4ba48aa5/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 98b1605..b4fb49f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -85,6 +85,7 @@ public class SparkTask extends Task { private transient List stageIds; private transient SparkJobRef jobRef = null; private transient boolean isShutdown = false; + private transient boolean jobKilled = false; @Override public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext driverContext, @@ -112,6 +113,11 @@ public class SparkTask extends Task { jobRef = sparkSession.submit(driverContext, sparkWork); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB); + if (driverContext.isShutdown()) { +killJob(); +throw new HiveException("Operation is cancelled."); + } + addToHistory(jobRef); sparkJobID = jobRef.getJobId(); this.jobID = jobRef.getSparkJobStatus().getAppID(); @@ -130,11 +136,11 @@ public class SparkTask extends Task { // ideally also cancel the app request here. But w/o facilities from Spark or YARN, // it's difficult to do it on hive side alone. See HIVE-12650. LOG.info("Failed to submit Spark job " + sparkJobID); -jobRef.cancelJob(); +killJob(); } else if (rc == 4) { LOG.info("The number of tasks reaches above the limit " + conf.getIntVar(HiveConf.ConfVars.SPARK_JOB_MAX_TASKS) + ". Cancelling Spark job " + sparkJobID + " with application ID " + jobID ); -jobRef.cancelJob(); +killJob(); } if (this.jobID == null) { @@ -305,14 +311,27 @@ public class SparkTask extends Task { @Override public void shutdown() { super.shutdown(); -if (jobRef != null && !isShutdown) { +killJob(); +isShutdown = true; + } + + private void killJob() { +boolean needToKillJob = false; +if (jobRef != null && !jobKilled) { + synchronized (this) { +if (!jobKilled) { + jobKilled = true; + needToKillJob = true; +} + } +} +if (needToKillJob) { try { jobRef.cancelJob(); } catch (Exception e) { LOG.warn("failed to kill job", e); } } -isShutdown = true; } /** @@ -393,6 +412,11 @@ public class SparkTask extends Task { if (rc != 0) { Throwable error = sparkJobStatus.getError(); if (error != null) { + if ((error instanceof InterruptedException) || + (error instanceof HiveException && + error.getCause() instanceof InterruptedException)) { +killJob(); + } setException(error); } }
hive git commit: HIVE-16552: Limit the number of tasks a Spark job may contain (Reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 9e9356b5e -> c6b5ad663 HIVE-16552: Limit the number of tasks a Spark job may contain (Reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6b5ad66 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6b5ad66 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6b5ad66 Branch: refs/heads/master Commit: c6b5ad663d235c15fc5bb5a24a1d3e9ac0d05140 Parents: 9e9356b Author: Xuefu Zhang <xu...@uber.com> Authored: Thu May 4 09:31:28 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu May 4 09:31:28 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../test/resources/testconfiguration.properties | 4 +- .../hadoop/hive/cli/control/CliConfigs.java | 1 + .../hadoop/hive/ql/exec/spark/SparkTask.java| 6 ++ .../spark/status/RemoteSparkJobMonitor.java | 15 +++- .../ql/exec/spark/status/SparkJobMonitor.java | 10 ++- .../clientnegative/spark_job_max_tasks.q| 6 ++ .../spark/spark_job_max_tasks.q.out | 77 8 files changed, 118 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 84398c6..99c26ce 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3350,6 +3350,8 @@ public class HiveConf extends Configuration { "hive.spark.use.groupby.shuffle", true, "Spark groupByKey transformation has better performance but uses unbounded memory." + "Turn this off when there is a memory issue."), +SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of tasks a Spark job may have.\n" + +"If a Spark job contains more tasks than the maximum, it will be cancelled. A value of -1 means no limit."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 753f3a9..5ab3076 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1445,4 +1445,6 @@ spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ groupby2_multi_distinct.q,\ groupby3_map_skew_multi_distinct.q,\ groupby3_multi_distinct.q,\ - groupby_grouping_sets7.q + groupby_grouping_sets7.q,\ + spark_job_max_tasks.q + http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java -- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 67064b8..1457db0 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -287,6 +287,7 @@ public class CliConfigs { excludesFrom(testConfigProps, "minimr.query.negative.files"); excludeQuery("authorization_uri_import.q"); +excludeQuery("spark_job_max_tasks.q"); setResultsDir("ql/src/test/results/clientnegative"); setLogDir("itests/qtest/target/qfile-results/clientnegative"); http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 32a7730..98b1605 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -129,8 +129,14 @@ public class SparkTask extends Task { // TODO: If the timeout is because of lack of resources in the cluste
[1/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
Repository: hive Updated Branches: refs/heads/master 812fa3946 -> 00b644482 http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out new file mode 100644 index 000..ca0910a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -0,0 +1,5921 @@ +PREHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: +name:default.src_orc_merge_test_part + +PREHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part +PREHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +PREHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +keyint +value string +ds string +ts string + +# Partition Information +# col_name data_type comment + +ds string +ts string + + A masked pattern was here +PREHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-2 + Stats-Aggr Operator +Stage-0 + Move Operator +table:{"name:":"default.src_orc_merge_test_part"} +Stage-1 + Map 1 + File Output Operator [FS_3] +table:{"name:":"default.src_orc_merge_test_part"} +Select Operator [SEL_1] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=10) +default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + +PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03',
[3/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/00b64448 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/00b64448 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/00b64448 Branch: refs/heads/master Commit: 00b644482656da9fb40788744e692f4e677b4c0d Parents: 812fa39 Author: Xuefu Zhang <xu...@uber.com> Authored: Tue May 2 10:28:37 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Tue May 2 10:28:37 2017 -0700 -- .../hive/common/jsonexplain/Connection.java | 35 + .../hive/common/jsonexplain/DagJsonParser.java | 167 + .../common/jsonexplain/DagJsonParserUtils.java | 53 + .../common/jsonexplain/JsonParserFactory.java |4 + .../hadoop/hive/common/jsonexplain/Op.java | 358 ++ .../hadoop/hive/common/jsonexplain/Printer.java | 41 + .../hadoop/hive/common/jsonexplain/Stage.java | 262 + .../hadoop/hive/common/jsonexplain/Vertex.java | 323 + .../jsonexplain/spark/SparkJsonParser.java | 35 + .../hive/common/jsonexplain/tez/Connection.java | 35 - .../hadoop/hive/common/jsonexplain/tez/Op.java | 356 -- .../hive/common/jsonexplain/tez/Printer.java| 41 - .../hive/common/jsonexplain/tez/Stage.java | 262 - .../common/jsonexplain/tez/TezJsonParser.java | 153 +- .../jsonexplain/tez/TezJsonParserUtils.java | 53 - .../hive/common/jsonexplain/tez/Vertex.java | 334 - .../org/apache/hadoop/hive/conf/HiveConf.java |5 +- .../test/resources/testconfiguration.properties |1 + .../hadoop/hive/ql/optimizer/Optimizer.java |2 +- .../hive/ql/parse/ExplainSemanticAnalyzer.java | 16 +- .../apache/hadoop/hive/ql/plan/SparkWork.java | 10 +- .../clientpositive/spark_explainuser_1.q| 671 ++ .../spark/spark_explainuser_1.q.out | 5921 ++ 23 files changed, 7915 insertions(+), 1223 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java new file mode 100644 index 000..0df6f4c --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain; + +public final class Connection implements Comparable{ + public final String type; + public final Vertex from; + + public Connection(String type, Vertex from) { +super(); +this.type = type; +this.from = from; + } + + @Override + public int compareTo(Connection o) { +return from.compareTo(o.from); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java new file mode 100644 index 000..1f01685 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to
[2/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java index ea86048..294dc6b 100644 --- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java @@ -18,146 +18,29 @@ package org.apache.hadoop.hive.common.jsonexplain.tez; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; +import org.apache.hadoop.hive.common.jsonexplain.DagJsonParser; -import org.apache.hadoop.hive.common.jsonexplain.JsonParser; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public final class TezJsonParser implements JsonParser { - public final Mapstages = new LinkedHashMap<>(); - protected final Logger LOG; - // the objects that have been printed. - public final Set printSet = new LinkedHashSet<>(); - // the vertex that should be inlined. - public final Map inlineMap = new LinkedHashMap<>(); - - public TezJsonParser() { -super(); -LOG = LoggerFactory.getLogger(this.getClass().getName()); - } - - public void extractStagesAndPlans(JSONObject inputObject) throws Exception { -// extract stages -JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES"); -if (dependency != null && dependency.length() > 0) { - // iterate for the first time to get all the names of stages. - for (String stageName : JSONObject.getNames(dependency)) { -this.stages.put(stageName, new Stage(stageName, this)); - } - // iterate for the second time to get all the dependency. - for (String stageName : JSONObject.getNames(dependency)) { -JSONObject dependentStageNames = dependency.getJSONObject(stageName); -this.stages.get(stageName).addDependency(dependentStageNames, this.stages); - } -} -// extract stage plans -JSONObject stagePlans = inputObject.getJSONObject("STAGE PLANS"); -if (stagePlans != null && stagePlans.length() > 0) { - for (String stageName : JSONObject.getNames(stagePlans)) { -JSONObject stagePlan = stagePlans.getJSONObject(stageName); -this.stages.get(stageName).extractVertex(stagePlan); - } -} - } - - /** - * @param indentFlag - * help to generate correct indent - * @return - */ - public static String prefixString(int indentFlag) { -StringBuilder sb = new StringBuilder(); -for (int index = 0; index < indentFlag; index++) { - sb.append(" "); -} -return sb.toString(); - } - - /** - * @param indentFlag - * @param tail - * help to generate correct indent with a specific tail - * @return - */ - public static String prefixString(int indentFlag, String tail) { -StringBuilder sb = new StringBuilder(); -for (int index = 0; index < indentFlag; index++) { - sb.append(" "); -} -int len = sb.length(); -return sb.replace(len - tail.length(), len, tail).toString(); - } +public class TezJsonParser extends DagJsonParser { @Override - public void print(JSONObject inputObject, PrintStream outputStream) throws Exception { -LOG.info("JsonParser is parsing:" + inputObject.toString()); -this.extractStagesAndPlans(inputObject); -Printer printer = new Printer(); -// print out the cbo info -if (inputObject.has("cboInfo")) { - printer.println(inputObject.getString("cboInfo")); - printer.println(); -} -// print out the vertex dependency in root stage -for (Stage candidate : this.stages.values()) { - if (candidate.tezStageDependency != null && candidate.tezStageDependency.size() > 0) { -printer.println("Vertex dependency in root stage"); -for (Entry entry : candidate.tezStageDependency.entrySet()) { - StringBuilder sb = new StringBuilder(); - sb.append(entry.getKey().name); - sb.append(" <- "); - boolean printcomma = false; - for (Connection connection : entry.getValue()) { -if (printcomma) { - sb.append(", "); -} else { - printcomma = true; -} -sb.append(connection.from.name + " (" + connection.type + ")"); - } - printer.println(sb.toString()); -} -printer.println(); - } + public String mapEdgeType(String edgeName) { +switch (edgeName) { + case "BROADCAST_EDGE": +
hive git commit: HIVE-16524: Remove the redundant item type in hiveserver2.jsp and QueryProfileTmpl.jamon (ZhangBing via Xuefu)
Repository: hive Updated Branches: refs/heads/master 79e3c5a8d -> 5ab03cba5 HIVE-16524: Remove the redundant item type in hiveserver2.jsp and QueryProfileTmpl.jamon (ZhangBing via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ab03cba Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ab03cba Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ab03cba Branch: refs/heads/master Commit: 5ab03cba5999de0c95e24aafad074099231297bc Parents: 79e3c5a Author: Xuefu Zhang <xu...@uber.com> Authored: Mon May 1 18:27:53 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Mon May 1 18:27:53 2017 -0700 -- .../src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon | 10 +- .../resources/hive-webapps/hiveserver2/hiveserver2.jsp| 8 2 files changed, 9 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5ab03cba/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon -- diff --git a/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon b/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon index 07aa3c1..fa69eb2 100644 --- a/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon +++ b/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon @@ -169,7 +169,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; <%args> SQLOperationDisplay sod; - + Stage Id Status @@ -218,7 +218,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Compile-time metadata operations - + Call Name Time (ms) @@ -237,7 +237,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Execution-time metadata operations - + Call Name Time (ms) @@ -256,7 +256,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Compile-Time Perf-Logger - + Compile-time Call Name Time (ms) @@ -275,7 +275,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Execution-Time Perf-Logger - + Execution-time Call Name Time (ms) http://git-wip-us.apache.org/repos/asf/hive/blob/5ab03cba/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp -- diff --git a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp index 33797ed..0e0803b 100644 --- a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp +++ b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp @@ -98,7 +98,7 @@ if (sessionManager != null) { Active Sessions - + User Name IP Address @@ -128,7 +128,7 @@ for (HiveSession hiveSession: hiveSessions) { Open Queries - + User Name Query @@ -169,7 +169,7 @@ for (HiveSession hiveSession: hiveSessions) { Last Max <%= conf.get(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES.varname) %> Closed Queries - + User Name Query @@ -213,7 +213,7 @@ for (HiveSession hiveSession: hiveSessions) { Software Attributes - + Attribute Name Value
[2/2] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79e3c5a8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79e3c5a8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79e3c5a8 Branch: refs/heads/master Commit: 79e3c5a8d10e60ae1a981e74b0c48011d3fb2cdc Parents: 62fbdd8 Author: Xuefu Zhang <xu...@uber.com> Authored: Mon May 1 18:16:27 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Mon May 1 18:16:27 2017 -0700 -- .../hive/common/jsonexplain/DagJsonParser.java | 167 + .../common/jsonexplain/JsonParserFactory.java |4 + .../jsonexplain/spark/SparkJsonParser.java | 35 + .../hive/common/jsonexplain/tez/Connection.java |2 +- .../hadoop/hive/common/jsonexplain/tez/Op.java | 54 +- .../hive/common/jsonexplain/tez/Printer.java|2 +- .../hive/common/jsonexplain/tez/Stage.java | 20 +- .../common/jsonexplain/tez/TezJsonParser.java | 153 +- .../jsonexplain/tez/TezJsonParserUtils.java |6 +- .../hive/common/jsonexplain/tez/Vertex.java | 87 +- .../org/apache/hadoop/hive/conf/HiveConf.java |5 +- .../test/resources/testconfiguration.properties |1 + .../hadoop/hive/ql/optimizer/Optimizer.java |2 +- .../hive/ql/parse/ExplainSemanticAnalyzer.java | 16 +- .../apache/hadoop/hive/ql/plan/SparkWork.java | 10 +- .../clientpositive/spark_explainuser_1.q| 671 ++ .../spark/spark_explainuser_1.q.out | 5921 ++ 17 files changed, 6924 insertions(+), 232 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/79e3c5a8/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java new file mode 100644 index 000..1f01685 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.hadoop.hive.common.jsonexplain.JsonParser; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class DagJsonParser implements JsonParser { + public final Map<String, Stage> stages = new LinkedHashMap<>(); + protected final Logger LOG; + // the objects that have been printed. + public final Set printSet = new LinkedHashSet<>(); + // the vertex that should be inlined. <Operator, list of Vertex that is + // inlined> + public final Map<Op, List> inlineMap = new LinkedHashMap<>(); + + public DagJsonParser() { +super(); +LOG = LoggerFactory.getLogger(this.getClass().getName()); + } + + public void extractStagesAndPlans(JSONObject inputObject) throws Exception { +// extract stages +JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES"); +if (dependency != null && dependency.length() > 0) { + // iterate for the first time to get all the names of stages. + for (String stageName : JSONObject.getNames(dependency)) { +this.stages.put(stageName, new Stage(stageName, this)); + } + // iterate for the second time to get all the dependency. + for (String stageName : JSONObject.getNames(dependency)) { +JSONObject dependentStageNames = dependency.getJSONObject(stageName); +this.stages.get(stageName).addDependency(dependentStageNames, this.stages); + } +} +// extract stage plans +JSON
[1/2] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
Repository: hive Updated Branches: refs/heads/master 62fbdd86e -> 79e3c5a8d http://git-wip-us.apache.org/repos/asf/hive/blob/79e3c5a8/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out new file mode 100644 index 000..ca0910a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -0,0 +1,5921 @@ +PREHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: +name:default.src_orc_merge_test_part + +PREHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part +PREHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +PREHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +keyint +value string +ds string +ts string + +# Partition Information +# col_name data_type comment + +ds string +ts string + + A masked pattern was here +PREHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-2 + Stats-Aggr Operator +Stage-0 + Move Operator +table:{"name:":"default.src_orc_merge_test_part"} +Stage-1 + Map 1 + File Output Operator [FS_3] +table:{"name:":"default.src_orc_merge_test_part"} +Select Operator [SEL_1] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=10) +default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + +PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03',
hive git commit: HIVE-12614: RESET command does not close spark session (Sahil via Xuefu)
Repository: hive Updated Branches: refs/heads/master fb3df4641 -> 1ed36f042 HIVE-12614: RESET command does not close spark session (Sahil via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ed36f04 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ed36f04 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ed36f04 Branch: refs/heads/master Commit: 1ed36f0428d53303d02ddd5c8a3a6c7f8db9e19a Parents: fb3df46 Author: Xuefu Zhang <xu...@uber.com> Authored: Tue Apr 25 14:09:35 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Tue Apr 25 14:10:39 2017 -0700 -- .../hive/ql/processors/ResetProcessor.java | 21 --- .../hadoop/hive/ql/processors/SetProcessor.java | 15 +++-- .../hive/ql/processors/TestResetProcessor.java | 59 3 files changed, 82 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ed36f04/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java index bbd4501..b40879d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java @@ -23,7 +23,11 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; + import org.apache.commons.lang3.StringUtils; + import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.SystemVariables; @@ -33,7 +37,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.session.SessionState; -import com.google.common.collect.Lists; public class ResetProcessor implements CommandProcessor { @@ -45,8 +48,11 @@ public class ResetProcessor implements CommandProcessor { @Override public CommandProcessorResponse run(String command) throws CommandNeedRetryException { -SessionState ss = SessionState.get(); +return run(SessionState.get(), command); + } + @VisibleForTesting + CommandProcessorResponse run(SessionState ss, String command) throws CommandNeedRetryException { CommandProcessorResponse authErrResp = CommandUtil.authorizeCommand(ss, HiveOperationType.RESET, Arrays.asList(command)); if (authErrResp != null) { @@ -88,7 +94,7 @@ public class ResetProcessor implements CommandProcessor { ? Lists.newArrayList("Resetting " + message + " to default values") : null); } - private void resetOverridesOnly(SessionState ss) { + private static void resetOverridesOnly(SessionState ss) { if (ss.getOverriddenConfigurations().isEmpty()) return; HiveConf conf = new HiveConf(); for (String key : ss.getOverriddenConfigurations().keySet()) { @@ -97,21 +103,20 @@ public class ResetProcessor implements CommandProcessor { ss.getOverriddenConfigurations().clear(); } - private void resetOverrideOnly(SessionState ss, String varname) { + private static void resetOverrideOnly(SessionState ss, String varname) { if (!ss.getOverriddenConfigurations().containsKey(varname)) return; setSessionVariableFromConf(ss, varname, new HiveConf()); ss.getOverriddenConfigurations().remove(varname); } - private void setSessionVariableFromConf(SessionState ss, String varname, - HiveConf conf) { + private static void setSessionVariableFromConf(SessionState ss, String varname, HiveConf conf) { String value = conf.get(varname); if (value != null) { - ss.getConf().set(varname, value); + SetProcessor.setConf(ss, varname, varname, value, false); } } - private CommandProcessorResponse resetToDefault(SessionState ss, String varname) { + private static CommandProcessorResponse resetToDefault(SessionState ss, String varname) { varname = varname.trim(); try { String nonErrorMessage = null; http://git-wip-us.apache.org/repos/asf/hive/blob/1ed36f04/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java index 0ffa182..1458211 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
hive git commit: HIVE-16430: Add log to show the cancelled query id when cancelOperation is called (Zhihai via Xuefu)
Repository: hive Updated Branches: refs/heads/master 794cfa35a -> 4deefcd50 HIVE-16430: Add log to show the cancelled query id when cancelOperation is called (Zhihai via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4deefcd5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4deefcd5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4deefcd5 Branch: refs/heads/master Commit: 4deefcd50433c29dcf7cb4d1e422097b9cfcbca1 Parents: 794cfa3 Author: Xuefu Zhang <xu...@uber.com> Authored: Fri Apr 14 06:59:46 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Fri Apr 14 06:59:46 2017 -0700 -- .../apache/hive/service/cli/operation/SQLOperation.java | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4deefcd5/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index 04fc0a1..d9bfba87 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -398,9 +398,11 @@ public class SQLOperation extends ExecuteStatementOperation { Future backgroundHandle = getBackgroundHandle(); if (backgroundHandle != null) { boolean success = backgroundHandle.cancel(true); +String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); if (success) { - String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); LOG.info("The running operation has been successfully interrupted: " + queryId); +} else if (state == OperationState.CANCELED) { + LOG.info("The running operation could not be cancelled, typically because it has already completed normally: " + queryId); } } } @@ -427,8 +429,16 @@ public class SQLOperation extends ExecuteStatementOperation { @Override public void cancel(OperationState stateAfterCancel) throws HiveSQLException { +String queryId = null; +if (stateAfterCancel == OperationState.CANCELED) { + queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); + LOG.info("Cancelling the query execution: " + queryId); +} cleanup(stateAfterCancel); cleanupOperationLog(); +if (stateAfterCancel == OperationState.CANCELED) { + LOG.info("Successfully cancelled the query: " + queryId); +} } @Override
hive git commit: HIVE-16286: Log canceled query id (Jimmy via Xuefu)
Repository: hive Updated Branches: refs/heads/master 736d2e861 -> 8fa9d5833 HIVE-16286: Log canceled query id (Jimmy via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8fa9d583 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8fa9d583 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8fa9d583 Branch: refs/heads/master Commit: 8fa9d5833c5c4fda092a917c9881db8fbfea5332 Parents: 736d2e8 Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Mar 23 19:36:16 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Mar 23 19:36:16 2017 -0700 -- .../org/apache/hive/service/cli/operation/SQLOperation.java| 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8fa9d583/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index ff389ac..f41092e 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -172,8 +172,9 @@ public class SQLOperation extends ExecuteStatementOperation { @Override public void run() { try { + String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); LOG.info("Query timed out after: " + queryTimeout - + " seconds. Cancelling the execution now."); + + " seconds. Cancelling the execution now: " + queryId); SQLOperation.this.cancel(OperationState.TIMEDOUT); } catch (HiveSQLException e) { LOG.error("Error cancelling the query after timeout: " + queryTimeout + " seconds", e); @@ -412,7 +413,8 @@ public class SQLOperation extends ExecuteStatementOperation { if (backgroundHandle != null) { boolean success = backgroundHandle.cancel(true); if (success) { - LOG.info("The running operation has been successfully interrupted."); + String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); + LOG.info("The running operation has been successfully interrupted: " + queryId); } } }
[1/2] hive git commit: HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter)
Repository: hive Updated Branches: refs/heads/master 1f7e26ff2 -> 71f4930d9 http://git-wip-us.apache.org/repos/asf/hive/blob/71f4930d/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java -- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java index 7a565dd..f733c1e 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java @@ -145,7 +145,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * Int: 8 decimal digits. An even number and 1/2 of MAX_LONGWORD_DECIMAL. */ private static final int INTWORD_DECIMAL_DIGITS = 8; - private static final int MAX_INTWORD_DECIMAL = (int) powerOfTenTable[INTWORD_DECIMAL_DIGITS] - 1; private static final int MULTIPLER_INTWORD_DECIMAL = (int) powerOfTenTable[INTWORD_DECIMAL_DIGITS]; /** @@ -164,9 +163,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { private static final long MAX_HIGHWORD_DECIMAL = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS] - 1; - private static long HIGHWORD_DIVIDE_FACTOR = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - HIGHWORD_DECIMAL_DIGITS]; - private static long HIGHWORD_MULTIPLY_FACTOR = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS]; - // 38 * 2 or 76 full decimal maximum - (64 + 8) digits in 4 lower longs (4 digits here). private static final long FULL_MAX_HIGHWORD_DECIMAL = powerOfTenTable[MAX_DECIMAL_DIGITS * 2 - (FOUR_X_LONGWORD_DECIMAL_DIGITS + INTWORD_DECIMAL_DIGITS)] - 1; @@ -189,11 +185,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { BigInteger.ONE.add(BIG_INTEGER_MAX_LONGWORD_DECIMAL); private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_2X = BIG_INTEGER_LONGWORD_MULTIPLIER.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER); - private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_3X = - BIG_INTEGER_LONGWORD_MULTIPLIER_2X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER); - private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_4X = - BIG_INTEGER_LONGWORD_MULTIPLIER_3X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER); - private static final BigInteger BIG_INTEGER_MAX_HIGHWORD_DECIMAL = BigInteger.valueOf(MAX_HIGHWORD_DECIMAL); private static final BigInteger BIG_INTEGER_HIGHWORD_MULTIPLIER = @@ -203,21 +194,21 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { // conversion. // There is only one blank in UTF-8. - private final static byte BYTE_BLANK = (byte) ' '; + private static final byte BYTE_BLANK = (byte) ' '; - private final static byte BYTE_DIGIT_ZERO = (byte) '0'; - private final static byte BYTE_DIGIT_NINE = (byte) '9'; + private static final byte BYTE_DIGIT_ZERO = (byte) '0'; + private static final byte BYTE_DIGIT_NINE = (byte) '9'; // Decimal point. - private final static byte BYTE_DOT = (byte) '.'; + private static final byte BYTE_DOT = (byte) '.'; // Sign. - private final static byte BYTE_MINUS = (byte) '-'; - private final static byte BYTE_PLUS = (byte) '+'; + private static final byte BYTE_MINUS = (byte) '-'; + private static final byte BYTE_PLUS = (byte) '+'; // Exponent E or e. - private final static byte BYTE_EXPONENT_LOWER = (byte) 'e'; - private final static byte BYTE_EXPONENT_UPPER = (byte) 'E'; + private static final byte BYTE_EXPONENT_LOWER = (byte) 'e'; + private static final byte BYTE_EXPONENT_UPPER = (byte) 'E'; // // Initialize (fastSetFrom*). @@ -1758,7 +1749,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * 4,611,686,018,427,387,904 or * 461,1686018427387904 (16 digit comma'd) */ - private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 = + private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 = new FastHiveDecimal(1, 1686018427387904L, 461L, 0, 19, 0); /* @@ -1769,7 +1760,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * 9,223,372,036,854,775,808 or * 922,3372036854775808 (16 digit comma'd) */ - private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 = + private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 = new FastHiveDecimal(1, 3372036854775808L, 922L, 0, 19, 0); /* @@ -1784,7 +1775,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * 42,535,295,865,117,307,932,921,825,928,971,026,432 or * 425352,9586511730793292,1825928971026432 (16 digit comma'd) */ - private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 = + private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 = new
[2/2] hive git commit: HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter)
HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/71f4930d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/71f4930d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/71f4930d Branch: refs/heads/master Commit: 71f4930d95475e7e63b5acc55af3809aefcc71e0 Parents: 1f7e26f Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Mar 16 19:20:41 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Mar 16 19:20:41 2017 -0700 -- .../org/apache/hive/beeline/BeeLineOpts.java| 2 +- .../apache/hive/beeline/HiveSchemaHelper.java | 12 ++-- .../org/apache/hive/beeline/HiveSchemaTool.java | 2 +- .../org/apache/hadoop/hive/cli/RCFileCat.java | 13 ++-- .../apache/hadoop/hive/cli/TestRCFileCat.java | 4 +- .../org/apache/hadoop/hive/common/LogUtils.java | 5 +- .../hadoop/hive/common/StatsSetupConst.java | 2 +- .../hive/metastore/TestMetastoreVersion.java| 4 +- .../hive/metastore/MetaStoreSchemaInfo.java | 16 ++--- .../hadoop/hive/ql/exec/ArchiveUtils.java | 7 +- .../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +- .../apache/hadoop/hive/ql/exec/Utilities.java | 21 +++--- .../ql/exec/vector/VectorizationContext.java| 2 +- .../exec/vector/expressions/CuckooSetBytes.java | 4 +- .../fast/VectorMapJoinFastHashTable.java| 6 +- .../hadoop/hive/ql/history/HiveHistoryImpl.java | 5 +- .../apache/hadoop/hive/ql/index/HiveIndex.java | 4 +- .../hadoop/hive/ql/io/HiveFileFormatUtils.java | 2 +- .../hadoop/hive/ql/io/HiveInputFormat.java | 3 +- .../org/apache/hadoop/hive/ql/io/RCFile.java| 2 +- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 6 +- .../ql/io/rcfile/stats/PartialScanTask.java | 6 +- .../hadoop/hive/ql/metadata/VirtualColumn.java | 2 +- .../hive/ql/optimizer/GenMapRedUtils.java | 6 +- .../ListBucketingPrunerUtils.java | 4 +- .../physical/GenMRSkewJoinProcessor.java| 10 +-- .../hive/ql/optimizer/physical/Vectorizer.java | 42 +-- .../ql/optimizer/physical/VectorizerReason.java | 2 +- .../hive/ql/parse/BaseSemanticAnalyzer.java | 8 +-- .../hive/ql/parse/DDLSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/WindowingSpec.java | 2 +- .../hadoop/hive/ql/plan/AbstractVectorDesc.java | 4 +- .../apache/hadoop/hive/ql/plan/GroupByDesc.java | 2 +- .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 6 +- .../hadoop/hive/ql/plan/ReduceSinkDesc.java | 2 +- .../hive/ql/plan/VectorAppMasterEventDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorFileSinkDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorFilterDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorGroupByDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorLimitDesc.java| 2 +- .../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorMapJoinInfo.java | 2 +- .../hive/ql/plan/VectorPartitionDesc.java | 2 +- .../hive/ql/plan/VectorReduceSinkDesc.java | 2 +- .../hive/ql/plan/VectorReduceSinkInfo.java | 2 +- .../hadoop/hive/ql/plan/VectorSMBJoinDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorSelectDesc.java | 2 +- .../ql/plan/VectorSparkHashTableSinkDesc.java | 2 +- .../VectorSparkPartitionPruningSinkDesc.java| 2 +- .../hive/ql/plan/VectorTableScanDesc.java | 2 +- .../hadoop/hive/ql/processors/HiveCommand.java | 2 +- .../udf/generic/GenericUDFInternalInterval.java | 4 +- .../mapjoin/fast/CheckFastRowHashMap.java | 17 ++--- .../mapjoin/fast/CommonFastHashTable.java | 4 +- .../apache/hadoop/hive/serde2/SerDeUtils.java | 2 +- .../hive/serde2/avro/AvroDeserializer.java | 2 +- .../lazy/fast/LazySimpleDeserializeRead.java| 4 +- .../hive/serde2/lazy/fast/StringToDouble.java | 4 +- .../hive/serde2/lazybinary/LazyBinaryUtils.java | 2 +- .../hive/serde2/typeinfo/TypeInfoUtils.java | 2 +- .../org/apache/hadoop/hive/io/HdfsUtils.java| 4 +- .../hive/io/HiveIOExceptionHandlerChain.java| 2 +- .../hive/io/HiveIOExceptionHandlerUtil.java | 4 +- .../apache/hadoop/hive/shims/ShimLoader.java| 2 +- .../hive/common/type/FastHiveDecimalImpl.java | 73 +--- .../hadoop/hive/common/type/RandomTypeUtil.java | 10 +-- .../hive/testutils/jdbc/HiveBurnInClient.java | 4 +- 67 files changed, 151 insertions(+), 243 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/71f4930d/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java -- diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeL
hive git commit: HIVE-16196: UDFJson having thread-safety issues (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 0e62d3dcb -> 87be4b31c HIVE-16196: UDFJson having thread-safety issues (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87be4b31 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87be4b31 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87be4b31 Branch: refs/heads/master Commit: 87be4b31ce5abbe03ee8461a437c901b5ee9ed05 Parents: 0e62d3d Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Mar 16 13:27:53 2017 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Mar 16 13:27:53 2017 -0700 -- .../org/apache/hadoop/hive/ql/udf/UDFJson.java | 67 1 file changed, 41 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/87be4b31/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java index 2c42fae..0c54754 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java @@ -59,20 +59,14 @@ import org.codehaus.jackson.type.JavaType; + " [,] : Union operator\n" + " [start:end:step] : array slice operator\n") public class UDFJson extends UDF { - private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); - private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); - - private static final JsonFactory JSON_FACTORY = new JsonFactory(); - static { -// Allows for unescaped ASCII control characters in JSON values -JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); -// Enabled to accept quoting of all character backslash qooting mechanism -JSON_FACTORY.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); - } - private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY); + private static final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); + private static final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); private static final JavaType LIST_TYPE = TypeFactory.fromClass(List.class); + private final JsonFactory jsonFactory = new JsonFactory(); + private final ObjectMapper objectMapper = new ObjectMapper(jsonFactory); + // An LRU cache using a linked hash map static class HashCache<K, V> extends LinkedHashMap<K, V> { @@ -93,16 +87,18 @@ public class UDFJson extends UDF { } - static Map<String, Object> extractObjectCache = new HashCache<String, Object>(); - static Map<String, String[]> pathExprCache = new HashCache<String, String[]>(); - static Map<String, ArrayList> indexListCache = + Map<String, Object> extractObjectCache = new HashCache<String, Object>(); + Map<String, String[]> pathExprCache = new HashCache<String, String[]>(); + Map<String, ArrayList> indexListCache = new HashCache<String, ArrayList>(); - static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>(); - static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>(); - - Text result = new Text(); + Map<String, String> mKeyGroup1Cache = new HashCache<String, String>(); + Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>(); public UDFJson() { +// Allows for unescaped ASCII control characters in JSON values +jsonFactory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); +// Enabled to accept quoting of all character backslash qooting mechanism +jsonFactory.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); } /** @@ -125,13 +121,13 @@ public class UDFJson extends UDF { * @return json string or null when an error happens. */ public Text evaluate(String jsonString, String pathString) { - if (jsonString == null || jsonString.isEmpty() || pathString == null || pathString.isEmpty() || pathString.charAt(0) != '$') { return null; } int pathExprStart = 1; +boolean unknownType = pathString.equals("$"); boolean isRootArray = false; if (pathString.length() > 1) { @@ -155,23 +151,41 @@ public class UDFJson extends UDF { // Cache extractObject Object extractObject = extractObjectCache.get(jsonString); if (extractObject == null) { - JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE; - try { -extractObject = MAPPER.rea
hive git commit: HIVE-16156: FileSinkOperator should delete existing output target when renaming (Reviewed by Sergey)
Repository: hive Updated Branches: refs/heads/master 8dda898ba -> 76b65baa7 HIVE-16156: FileSinkOperator should delete existing output target when renaming (Reviewed by Sergey) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76b65baa Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76b65baa Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76b65baa Branch: refs/heads/master Commit: 76b65baa7b5f5c0e5c1f99cf0621247f65fb0b00 Parents: 8dda898 Author: Xuefu Zhang <xu...@uber.com> Authored: Fri Mar 10 22:14:49 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Fri Mar 10 22:14:49 2017 -0800 -- .../apache/hadoop/hive/ql/exec/FileSinkOperator.java| 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/76b65baa/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3bbe92d..a9d03d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -229,8 +229,18 @@ public class FileSinkOperator extends TerminalOperator implements } } if (needToRename && outPaths[idx] != null && !fs.rename(outPaths[idx], finalPaths[idx])) { -throw new HiveException("Unable to rename output from: " + +FileStatus fileStatus = FileUtils.getFileStatusOrNull(fs, finalPaths[idx]); +if (fileStatus != null) { + LOG.warn("Target path " + finalPaths[idx] + " with a size " + fileStatus.getLen() + " exists. Trying to delete it."); + if (!fs.delete(finalPaths[idx], true)) { +throw new HiveException("Unable to delete existing target output: " + finalPaths[idx]); + } +} + +if (!fs.rename(outPaths[idx], finalPaths[idx])) { + throw new HiveException("Unable to rename output from: " + outPaths[idx] + " to: " + finalPaths[idx]); +} } updateProgress(); } catch (IOException e) {
hive git commit: HIVE-15671: RPCServer.registerClient() erroneously uses server/client handshake timeout for connection timeout (reviewed by Jimmy)
Repository: hive Updated Branches: refs/heads/master 791066178 -> 401b14ac7 HIVE-15671: RPCServer.registerClient() erroneously uses server/client handshake timeout for connection timeout (reviewed by Jimmy) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/401b14ac Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/401b14ac Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/401b14ac Branch: refs/heads/master Commit: 401b14ac778ff58cbc5e76f08de002ea4edf3c57 Parents: 7910661 Author: Xuefu Zhang <xu...@uber.com> Authored: Mon Feb 13 11:08:53 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Mon Feb 13 11:08:53 2017 -0800 -- .../src/main/java/org/apache/hive/spark/client/rpc/Rpc.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/401b14ac/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java index b2f133b..0489684 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java @@ -120,7 +120,7 @@ public class Rpc implements Closeable { } }; final ScheduledFuture timeoutFuture = eloop.schedule(timeoutTask, -rpcConf.getServerConnectTimeoutMs(), TimeUnit.MILLISECONDS); +connectTimeoutMs, TimeUnit.MILLISECONDS); // The channel listener instantiates the Rpc instance when the connection is established, // and initiates the SASL handshake.
hive git commit: HIVE-15683: Make what's done in HIVE-15580 for group by configurable (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 19a6831b9 -> 6c901fb3e HIVE-15683: Make what's done in HIVE-15580 for group by configurable (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c901fb3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c901fb3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c901fb3 Branch: refs/heads/master Commit: 6c901fb3e681edb76e3251996b14dac4ae092ce5 Parents: 19a6831 Author: Xuefu Zhang <xu...@uber.com> Authored: Wed Feb 8 14:58:19 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Wed Feb 8 14:58:19 2017 -0800 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../hive/ql/exec/spark/GroupByShuffler.java | 11 +++-- .../hive/ql/exec/spark/HiveReduceFunction.java | 10 ++--- .../spark/HiveReduceFunctionResultList.java | 18 ++--- .../hadoop/hive/ql/exec/spark/ReduceTran.java | 8 ++-- .../hive/ql/exec/spark/RepartitionShuffler.java | 42 .../hive/ql/exec/spark/SortByShuffler.java | 2 +- .../hive/ql/exec/spark/SparkPlanGenerator.java | 6 ++- .../hive/ql/exec/spark/SparkShuffler.java | 4 +- .../clientpositive/lateral_view_explode2.q | 4 +- .../clientpositive/spark/union_remove_25.q.out | 2 +- 11 files changed, 85 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f3b01b2..e82758f 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3228,6 +3228,10 @@ public class HiveConf extends Configuration { SPARK_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE( "hive.spark.dynamic.partition.pruning.max.data.size", 100*1024*1024L, "Maximum total data size in dynamic pruning."), +SPARK_USE_GROUPBY_SHUFFLE( +"hive.spark.use.groupby.shuffle", true, +"Spark groupByKey transformation has better performance but uses unbounded memory." + +"Turn this off when there is a memory issue."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java index 8267515..9f9e3b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java @@ -20,18 +20,17 @@ package org.apache.hadoop.hive.ql.exec.spark; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; -import org.apache.spark.HashPartitioner; import org.apache.spark.api.java.JavaPairRDD; -public class GroupByShuffler implements SparkShuffler { +public class GroupByShuffler implements SparkShuffler<Iterable> { @Override - public JavaPairRDD<HiveKey, BytesWritable> shuffle( + public JavaPairRDD<HiveKey, Iterable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { -if (numPartitions < 0) { - numPartitions = 1; +if (numPartitions > 0) { + return input.groupByKey(numPartitions); } -return input.repartitionAndSortWithinPartitions(new HashPartitioner(numPartitions)); +return input.groupByKey(); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java index 2b85872..2b6e2de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java @@ -25,8 +25,8 @@ import org.apache.hadoop.io.BytesWritable; import scala.Tuple2; -public class HiveReduceFunction extends HivePairFlatMapFunction< - Iterator<Tuple2<HiveKey, BytesWritable>>, HiveKey
hive git commit: HIVE-15682: Eliminate per-row based dummy iterator creation (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 3e01ef326 -> 561dbe3b9 HIVE-15682: Eliminate per-row based dummy iterator creation (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/561dbe3b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/561dbe3b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/561dbe3b Branch: refs/heads/master Commit: 561dbe3b90bc5cd85a64e22ccd9e384bbf67a782 Parents: 3e01ef3 Author: Xuefu Zhang <xu...@uber.com> Authored: Tue Feb 7 13:48:55 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Tue Feb 7 13:48:55 2017 -0800 -- .../ql/exec/spark/SparkReduceRecordHandler.java | 56 1 file changed, 35 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/561dbe3b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 44f2e4d..8251900 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -211,30 +211,44 @@ public class SparkReduceRecordHandler extends SparkRecordHandler { } /** - * TODO: Instead of creating a dummy iterator per row, we can implement a private method that's - * similar to processRow(Object key, Iterator values) but processes one row at a time. Then, - * we just call that private method here. + * A reusable dummy iterator that has only one value. + * */ - @Override - public void processRow(Object key, final Object value) throws IOException { -processRow(key, new Iterator() { - boolean done = false; - @Override - public boolean hasNext() { -return !done; - } + private static class DummyIterator implements Iterator { +private boolean done = false; +private Object value = null; - @Override - public Object next() { -done = true; -return value; - } +public void setValue(Object v) { + this.value = v; + done = false; +} - @Override - public void remove() { -throw new UnsupportedOperationException("Iterator.remove() is not implemented/supported"); - } -}); +@Override +public boolean hasNext() { + return !done; +} + +@Override +public Object next() { + done = true; + return value; +} + +@Override +public void remove() { + throw new UnsupportedOperationException("Iterator.remove() is not implemented/supported"); +} + } + + private DummyIterator dummyIterator = new DummyIterator(); + + /** + * Process one row using a dummy iterator. + */ + @Override + public void processRow(Object key, final Object value) throws IOException { +dummyIterator.setValue(value); +processRow(key, dummyIterator); } @Override
hive git commit: HIVE-15749: Add missing ASF headers (Peter via Xuefu)
Repository: hive Updated Branches: refs/heads/master 4a03fb1da -> 5c403e9fc HIVE-15749: Add missing ASF headers (Peter via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c403e9f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c403e9f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c403e9f Branch: refs/heads/master Commit: 5c403e9fc0552559914079ca480eba8b856b7ee8 Parents: 4a03fb1 Author: Xuefu Zhang <xu...@uber.com> Authored: Wed Feb 1 13:51:59 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Wed Feb 1 13:51:59 2017 -0800 -- .../hive/common/classification/RetrySemantics.java | 17 + .../hadoop/hive/druid/io/DruidRecordWriter.java| 17 + .../hive/jdbc/TestHivePreparedStatement.java | 17 + .../hive/llap/io/encoded/LineRrOffsetReader.java | 17 + .../hive/llap/io/encoded/PassThruOffsetReader.java | 17 + .../hadoop/hive/ql/parse/TestMergeStatement.java | 17 + .../apache/hadoop/hive/ql/plan/TestMapWork.java| 17 + 7 files changed, 119 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java b/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java index abad45e..5883b01 100644 --- a/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java +++ b/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.common.classification; http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java index 1601a9a..3323cc0 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.druid.io; import com.google.common.base.Function; http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java -- diff --git a/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java b/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java index bc49aeb..2a68c91 100644 --- a/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java +++ b/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) und
hive git commit: HIVE-15580: Eliminate unbounded memory usage for orderBy and groupBy in Hive on Spark (reviewed by Chao Sun)
Repository: hive Updated Branches: refs/heads/master f968cf78a -> 811b3e39e HIVE-15580: Eliminate unbounded memory usage for orderBy and groupBy in Hive on Spark (reviewed by Chao Sun) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/811b3e39 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/811b3e39 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/811b3e39 Branch: refs/heads/master Commit: 811b3e39ed569232c4f138c1287109ef8ebce132 Parents: f968cf7 Author: Xuefu Zhang <xu...@uber.com> Authored: Fri Jan 20 12:56:49 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Fri Jan 20 12:56:49 2017 -0800 -- .../hive/ql/exec/spark/GroupByShuffler.java | 10 +-- .../hive/ql/exec/spark/HiveReduceFunction.java | 4 +- .../spark/HiveReduceFunctionResultList.java | 8 +-- .../hadoop/hive/ql/exec/spark/ReduceTran.java | 4 +- .../hadoop/hive/ql/exec/spark/ShuffleTran.java | 6 +- .../hive/ql/exec/spark/SortByShuffler.java | 65 +--- .../hive/ql/exec/spark/SparkPlanGenerator.java | 7 --- .../ql/exec/spark/SparkReduceRecordHandler.java | 29 +++-- .../hive/ql/exec/spark/SparkShuffler.java | 2 +- .../queries/clientpositive/union_top_level.q| 8 +-- .../clientpositive/llap/union_top_level.q.out | 52 .../spark/lateral_view_explode2.q.out | 2 +- .../clientpositive/spark/union_remove_25.q.out | 2 +- .../clientpositive/spark/union_top_level.q.out | 62 +-- .../spark/vector_outer_join5.q.out | 40 ++-- 15 files changed, 124 insertions(+), 177 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java index e128dd2..8267515 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java @@ -20,21 +20,23 @@ package org.apache.hadoop.hive.ql.exec.spark; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; +import org.apache.spark.HashPartitioner; import org.apache.spark.api.java.JavaPairRDD; public class GroupByShuffler implements SparkShuffler { @Override - public JavaPairRDD<HiveKey, Iterable> shuffle( + public JavaPairRDD<HiveKey, BytesWritable> shuffle( JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) { -if (numPartitions > 0) { - return input.groupByKey(numPartitions); +if (numPartitions < 0) { + numPartitions = 1; } -return input.groupByKey(); +return input.repartitionAndSortWithinPartitions(new HashPartitioner(numPartitions)); } @Override public String getName() { return "GroupBy"; } + } http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java index eeb4443..2b85872 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java @@ -26,7 +26,7 @@ import org.apache.hadoop.io.BytesWritable; import scala.Tuple2; public class HiveReduceFunction extends HivePairFlatMapFunction< - Iterator<Tuple2<HiveKey, Iterable>>, HiveKey, BytesWritable> { + Iterator<Tuple2<HiveKey, BytesWritable>>, HiveKey, BytesWritable> { private static final long serialVersionUID = 1L; @@ -37,7 +37,7 @@ public class HiveReduceFunction extends HivePairFlatMapFunction< @SuppressWarnings("unchecked") @Override public Iterator<Tuple2<HiveKey, BytesWritable>> - call(Iterator<Tuple2<HiveKey, Iterable>> it) throws Exception { + call(Iterator<Tuple2<HiveKey, BytesWritable>> it) throws Exception { initJobConf(); SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler(); http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java b/ql/src/java/org/ap
hive git commit: HIVE-15543: Don't try to get memory/cores to decide parallelism when Spark dynamic allocation is enabled (Reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 9e7d384f6 -> ccc9bf3ea HIVE-15543: Don't try to get memory/cores to decide parallelism when Spark dynamic allocation is enabled (Reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccc9bf3e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccc9bf3e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccc9bf3e Branch: refs/heads/master Commit: ccc9bf3eaadadcbb3c93faa4a9ccc0e20c41dc28 Parents: 9e7d384 Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Jan 5 10:56:02 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Jan 5 10:56:02 2017 -0800 -- .../spark/SetSparkReducerParallelism.java | 56 1 file changed, 33 insertions(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ccc9bf3e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java index ff4924d..7a5b71f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java @@ -53,6 +53,8 @@ public class SetSparkReducerParallelism implements NodeProcessor { private static final Logger LOG = LoggerFactory.getLogger(SetSparkReducerParallelism.class.getName()); + private static final String SPARK_DYNAMIC_ALLOCATION_ENABLED = "spark.dynamicAllocation.enabled"; + // Spark memory per task, and total number of cores private ObjectPair<Long, Integer> sparkMemoryAndCores; @@ -109,34 +111,12 @@ public class SetSparkReducerParallelism implements NodeProcessor { } } -if (sparkMemoryAndCores == null) { - SparkSessionManager sparkSessionManager = null; - SparkSession sparkSession = null; - try { -sparkSessionManager = SparkSessionManagerImpl.getInstance(); -sparkSession = SparkUtilities.getSparkSession( - context.getConf(), sparkSessionManager); -sparkMemoryAndCores = sparkSession.getMemoryAndCores(); - } catch (HiveException e) { -throw new SemanticException("Failed to get a spark session: " + e); - } catch (Exception e) { -LOG.warn("Failed to get spark memory/core info", e); - } finally { -if (sparkSession != null && sparkSessionManager != null) { - try { -sparkSessionManager.returnSession(sparkSession); - } catch (HiveException ex) { -LOG.error("Failed to return the session to SessionManager: " + ex, ex); - } -} - } -} - // Divide it by 2 so that we can have more reducers long bytesPerReducer = context.getConf().getLongVar(HiveConf.ConfVars.BYTESPERREDUCER) / 2; int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer, maxReducers, false); +getSparkMemoryAndCores(context); if (sparkMemoryAndCores != null && sparkMemoryAndCores.getFirst() > 0 && sparkMemoryAndCores.getSecond() > 0) { // warn the user if bytes per reducer is much larger than memory per task @@ -184,4 +164,34 @@ public class SetSparkReducerParallelism implements NodeProcessor { return false; } + private void getSparkMemoryAndCores(OptimizeSparkProcContext context) throws SemanticException { +if (context.getConf().getBoolean(SPARK_DYNAMIC_ALLOCATION_ENABLED, false)) { + // If dynamic allocation is enabled, numbers for memory and cores are meaningless. So, we don't + // try to get it. + sparkMemoryAndCores = null; + return; +} + +SparkSessionManager sparkSessionManager = null; +SparkSession sparkSession = null; +try { + sparkSessionManager = SparkSessionManagerImpl.getInstance(); + sparkSession = SparkUtilities.getSparkSession( + context.getConf(), sparkSessionManager); + sparkMemoryAndCores = sparkSession.getMemoryAndCores(); +} catch (HiveException e) { + throw new SemanticException("Failed to get a spark session: " + e); +} catch (Exception e) { + LOG.warn("Failed to get spark memory/core info", e); +} finally { + if (sparkSession != null && sparkSessionManager != null) { +try { + sparkSessionManager.returnSession(sp
hive git commit: HIVE-15528: Expose Spark job error in SparkTask (Zhihai via Xuefu)
Repository: hive Updated Branches: refs/heads/master 5d45974e9 -> c928ad3d3 HIVE-15528: Expose Spark job error in SparkTask (Zhihai via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c928ad3d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c928ad3d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c928ad3d Branch: refs/heads/master Commit: c928ad3d3f958d1e2e109b689fc5c6e9ee3e619b Parents: 5d45974 Author: Xuefu Zhang <xu...@uber.com> Authored: Tue Jan 3 10:39:39 2017 -0800 Committer: Xuefu Zhang <xu...@uber.com> Committed: Tue Jan 3 10:39:39 2017 -0800 -- .../org/apache/hadoop/hive/ql/exec/spark/SparkTask.java | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c928ad3d/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index f836065..87d80a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -114,7 +114,7 @@ public class SparkTask extends Task { this.jobID = jobRef.getSparkJobStatus().getAppID(); rc = jobRef.monitorJob(); SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus(); - getSparkJobInfo(sparkJobStatus); + getSparkJobInfo(sparkJobStatus, rc); if (rc == 0) { sparkStatistics = sparkJobStatus.getSparkStatistics(); if (LOG.isInfoEnabled() && sparkStatistics != null) { @@ -139,6 +139,7 @@ public class SparkTask extends Task { // org.apache.commons.lang.StringUtils console.printError(msg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); LOG.error(msg, e); + setException(e); rc = 1; } finally { startTime = perfLogger.getEndTime(PerfLogger.SPARK_SUBMIT_TO_RUNNING); @@ -196,6 +197,7 @@ public class SparkTask extends Task { String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + StringUtils.stringifyException(e)); +setException(e); } } return rc; @@ -330,7 +332,7 @@ public class SparkTask extends Task { return counters; } - private void getSparkJobInfo(SparkJobStatus sparkJobStatus) { + private void getSparkJobInfo(SparkJobStatus sparkJobStatus, int rc) { try { stageIds = new ArrayList(); int[] ids = sparkJobStatus.getStageIds(); @@ -355,6 +357,12 @@ public class SparkTask extends Task { succeededTaskCount = sumComplete; totalTaskCount = sumTotal; failedTaskCount = sumFailed; + if (rc != 0) { +Throwable error = sparkJobStatus.getError(); +if (error != null) { + setException(error); +} + } } catch (Exception e) { LOG.error("Failed to get Spark job information", e); }
hive git commit: HIVE-14617: NPE in UDF MapValues() if input is null (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 9343fee5d -> 2f686d4c0 HIVE-14617: NPE in UDF MapValues() if input is null (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2f686d4c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2f686d4c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2f686d4c Branch: refs/heads/master Commit: 2f686d4c0c20540079660de202c619e42ed5cd4f Parents: 9343fee Author: Xuefu Zhang <xu...@uber.com> Authored: Thu Aug 25 11:05:25 2016 -0700 Committer: Xuefu Zhang <xu...@uber.com> Committed: Thu Aug 25 11:05:25 2016 -0700 -- .../ql/udf/generic/GenericUDFMapValues.java | 6 ++- .../ql/udf/generic/TestGenericUDFMapValues.java | 56 2 files changed, 61 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2f686d4c/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java index 096ceac..3bd5864 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList; +import java.util.Map; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -61,7 +62,10 @@ public class GenericUDFMapValues extends GenericUDF { public Object evaluate(DeferredObject[] arguments) throws HiveException { retArray.clear(); Object mapObj = arguments[0].get(); -retArray.addAll(mapOI.getMap(mapObj).values()); +Map map = mapOI.getMap(mapObj); +if (map != null) { + retArray.addAll(map.values()); +} return retArray; } http://git-wip-us.apache.org/repos/asf/hive/blob/2f686d4c/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java new file mode 100644 index 000..44676ed --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class TestGenericUDFMapValues { + + @Test + public void testNullMap() throws HiveException, IOException { +ObjectInspector[] inputOIs = { +ObjectInspectorFactory.getStandardMapObjectInspector( +PrimitiveObjectInspectorFactory.writableStringObjectInspector, +PrimitiveObjectInspectorFactory.writableStringObjectInspector), +}; + +Map<String, String> input = null; +DeferredObject[] args = { +new DeferredJavaObject(input) +}; + + GenericUDFMapValues udf = new GenericUDFMapValues(); +StandardListObjectInspector oi = (StandardListObjectInspector) udf.initialize(inputOIs); +Object res = udf.evaluate(args); +
svn commit: r1733688 - /hive/cms/trunk/content/people.mdtext
Author: xuefu Date: Sat Mar 5 04:28:38 2016 New Revision: 1733688 URL: http://svn.apache.org/viewvc?rev=1733688=rev Log: Update Xuefu's information Modified: hive/cms/trunk/content/people.mdtext Modified: hive/cms/trunk/content/people.mdtext URL: http://svn.apache.org/viewvc/hive/cms/trunk/content/people.mdtext?rev=1733688=1733687=1733688=diff == --- hive/cms/trunk/content/people.mdtext (original) +++ hive/cms/trunk/content/people.mdtext Sat Mar 5 04:28:38 2016 @@ -214,9 +214,9 @@ tr:nth-child(2n+1) { -xuefuz +xuefu Xuefu Zhang -http://cloudera.com/;>Cloudera +
hive git commit: HIVE-13101: NullPointerException in HiveLexer.g (Sandeep via Xuefu)
Repository: hive Updated Branches: refs/heads/master bc4dcf376 -> e9b734852 HIVE-13101: NullPointerException in HiveLexer.g (Sandeep via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9b73485 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9b73485 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9b73485 Branch: refs/heads/master Commit: e9b73485281730abf73b35d9029000edd42fa35c Parents: bc4dcf3 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Wed Feb 24 15:50:47 2016 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Wed Feb 24 15:50:47 2016 -0800 -- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e9b73485/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 4c4470b..3f92d16 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -31,6 +31,9 @@ import org.apache.hadoop.hive.conf.HiveConf; } protected boolean allowQuotedId() { +if(hiveConf == null){ + return false; +} String supportedQIds = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); return !"none".equals(supportedQIds); }
hive git commit: HIVE-13045: move guava dependency back to 14 after HIVE-12952 (Mohit via Xuefu)
Repository: hive Updated Branches: refs/heads/master cc8cec235 -> fd59191be HIVE-13045: move guava dependency back to 14 after HIVE-12952 (Mohit via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fd59191b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fd59191b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fd59191b Branch: refs/heads/master Commit: fd59191be047a980dec704a2a1e764fd22d22936 Parents: cc8cec2 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Fri Feb 19 15:02:40 2016 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Fri Feb 19 15:02:40 2016 -0800 -- pom.xml| 2 +- .../org/apache/hive/service/cli/operation/OperationManager.java| 1 - 2 files changed, 1 insertion(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/fd59191b/pom.xml -- diff --git a/pom.xml b/pom.xml index af2e3d1..836e397 100644 --- a/pom.xml +++ b/pom.xml @@ -129,7 +129,7 @@ 1.4 10.10.2.0 3.1.0 -15.0 +14.0.1 2.4.4 2.6.0 ${basedir}/${hive.path.to.root}/testutils/hadoop http://git-wip-us.apache.org/repos/asf/hive/blob/fd59191b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java index 96c01de..1b8aca9 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java +++ b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import com.google.common.collect.EvictingQueue; import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
hive git commit: HIVE-12205: Unify metric collection for local and remote spark client. (Chinna via Chengxiang)
Repository: hive Updated Branches: refs/heads/master a6d9bf76e -> 9829f9985 HIVE-12205: Unify metric collection for local and remote spark client. (Chinna via Chengxiang) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9829f998 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9829f998 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9829f998 Branch: refs/heads/master Commit: 9829f9985c48742a070b0f09889d8d74d24b5553 Parents: a6d9bf7 Author: chengxiang <chengxi...@apache.com> Authored: Wed Feb 17 18:36:51 2016 +0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Wed Feb 17 06:34:10 2016 -0800 -- .../spark/status/impl/LocalSparkJobStatus.java | 94 +++- .../spark/status/impl/RemoteSparkJobStatus.java | 35 +--- .../exec/spark/status/impl/SparkJobUtils.java | 56 .../hive/spark/client/MetricsCollection.java| 2 +- 4 files changed, 73 insertions(+), 114 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9829f998/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java index 3c15521..d4819d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistics; import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder; import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus; import org.apache.hadoop.hive.ql.exec.spark.status.SparkStageProgress; +import org.apache.hive.spark.client.MetricsCollection; +import org.apache.hive.spark.client.metrics.Metrics; import org.apache.hive.spark.counter.SparkCounters; import org.apache.spark.JobExecutionStatus; import org.apache.spark.SparkJobInfo; @@ -135,7 +137,18 @@ public class LocalSparkJobStatus implements SparkJobStatus { return null; } -Map<String, Long> flatJobMetric = combineJobLevelMetrics(jobMetric); +MetricsCollection metricsCollection = new MetricsCollection(); +Set stageIds = jobMetric.keySet(); +for (String stageId : stageIds) { + List taskMetrics = jobMetric.get(stageId); + for (TaskMetrics taskMetric : taskMetrics) { +Metrics metrics = new Metrics(taskMetric); +metricsCollection.addMetrics(jobId, Integer.parseInt(stageId), 0, metrics); + } +} +SparkJobUtils sparkJobUtils = new SparkJobUtils(); +Map<String, Long> flatJobMetric = sparkJobUtils.collectMetrics(metricsCollection +.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } @@ -153,85 +166,6 @@ public class LocalSparkJobStatus implements SparkJobStatus { } } - private Map<String, Long> combineJobLevelMetrics(Map<String, List> jobMetric) { -Map<String, Long> results = Maps.newLinkedHashMap(); - -long executorDeserializeTime = 0; -long executorRunTime = 0; -long resultSize = 0; -long jvmGCTime = 0; -long resultSerializationTime = 0; -long memoryBytesSpilled = 0; -long diskBytesSpilled = 0; -long bytesRead = 0; -long remoteBlocksFetched = 0; -long localBlocksFetched = 0; -long fetchWaitTime = 0; -long remoteBytesRead = 0; -long shuffleBytesWritten = 0; -long shuffleWriteTime = 0; -boolean inputMetricExist = false; -boolean shuffleReadMetricExist = false; -boolean shuffleWriteMetricExist = false; - -for (List stageMetric : jobMetric.values()) { - if (stageMetric != null) { -for (TaskMetrics taskMetrics : stageMetric) { - if (taskMetrics != null) { -executorDeserializeTime += taskMetrics.executorDeserializeTime(); -executorRunTime += taskMetrics.executorRunTime(); -resultSize += taskMetrics.resultSize(); -jvmGCTime += taskMetrics.jvmGCTime(); -resultSerializationTime += taskMetrics.resultSerializationTime(); -memoryBytesSpilled += taskMetrics.memoryBytesSpilled(); -diskBytesSpilled += taskMetrics.diskBytesSpilled(); -if (!taskMetrics.inputMetrics().isEmpty()) { - inputMetricExist = true; - bytesRead += taskMetrics.inputMetrics().get().bytesRead(); -} -Option shuffle
hive git commit: HIVE-12888: TestSparkNegativeCliDriver does not run in Spark mode[Spark Branch] (Chengxiang via Xuefu)
Repository: hive Updated Branches: refs/heads/spark 8e0a10c82 -> e07826041 HIVE-12888: TestSparkNegativeCliDriver does not run in Spark mode[Spark Branch] (Chengxiang via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e0782604 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e0782604 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e0782604 Branch: refs/heads/spark Commit: e07826041e0326228ab4eeeaebe46625bbac3c99 Parents: 8e0a10c Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Tue Jan 26 19:31:49 2016 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Tue Jan 26 19:31:49 2016 -0800 -- ql/src/test/templates/TestNegativeCliDriver.vm | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e0782604/ql/src/test/templates/TestNegativeCliDriver.vm -- diff --git a/ql/src/test/templates/TestNegativeCliDriver.vm b/ql/src/test/templates/TestNegativeCliDriver.vm index 5f8ee8e..2ea476f 100644 --- a/ql/src/test/templates/TestNegativeCliDriver.vm +++ b/ql/src/test/templates/TestNegativeCliDriver.vm @@ -41,13 +41,17 @@ public class $className extends TestCase { static { MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode"); +String hiveConfDir = "$hiveConfDir"; String initScript = "$initScript"; String cleanupScript = "$cleanupScript"; try { String hadoopVer = "$hadoopVersion"; - qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, hadoopVer, - initScript, cleanupScript); + if (!hiveConfDir.isEmpty()) { +hiveConfDir = HIVE_ROOT + hiveConfDir; + } + qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, + hiveConfDir, hadoopVer, initScript, cleanupScript); // do a one time initialization qt.cleanUp(); qt.createSources();
hive git commit: HIVE-12708: Hive on Spark doesn't work with Kerboresed HBase [Spark Branch] (reviewed by Szehon)
Repository: hive Updated Branches: refs/heads/spark 9af0b27bd -> a116e96b7 HIVE-12708: Hive on Spark doesn't work with Kerboresed HBase [Spark Branch] (reviewed by Szehon) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a116e96b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a116e96b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a116e96b Branch: refs/heads/spark Commit: a116e96b75998b5e8632c46678cd94c551fba78a Parents: 9af0b27 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Fri Dec 18 14:37:03 2015 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Fri Dec 18 14:37:03 2015 -0800 -- .../hive/ql/exec/spark/HiveSparkClientFactory.java | 11 +++ 1 file changed, 11 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a116e96b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index ec0fdea..9b2dce3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -30,6 +30,7 @@ import org.apache.commons.compress.utils.CharsetNames; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.HiveKey; @@ -67,6 +68,7 @@ public class HiveSparkClientFactory { public static Map<String, String> initiateSparkConf(HiveConf hiveConf) { Map<String, String> sparkConf = new HashMap<String, String>(); +HBaseConfiguration.addHbaseResources(hiveConf); // set default spark configurations. sparkConf.put("spark.master", SPARK_DEFAULT_MASTER); @@ -139,7 +141,16 @@ public class HiveSparkClientFactory { if (value != null && !value.isEmpty()) { sparkConf.put("spark.hadoop." + propertyName, value); } + } else if (propertyName.startsWith("hbase")) { +// Add HBase related configuration to Spark because in security mode, Spark needs it +// to generate hbase delegation token for Spark. This is a temp solution to deal with +// Spark problem. +String value = hiveConf.get(propertyName); +sparkConf.put("spark.hadoop." + propertyName, value); +LOG.info(String.format( + "load HBase configuration (%s -> %s).", propertyName, value)); } + if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) { String value = RpcConfiguration.getValue(hiveConf, propertyName); sparkConf.put(propertyName, value);
hive git commit: HIVE-12538: After set spark related config, SparkSession never get reused (Nemon Lou via Xuefu)
Repository: hive Updated Branches: refs/heads/master 09b6f9a36 -> 305b8ce40 HIVE-12538: After set spark related config, SparkSession never get reused (Nemon Lou via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/305b8ce4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/305b8ce4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/305b8ce4 Branch: refs/heads/master Commit: 305b8ce4097a692a2ee718b1df384d98d1e6fc1a Parents: 09b6f9a Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Wed Dec 16 08:31:27 2015 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Wed Dec 16 08:31:27 2015 -0800 -- .../java/org/apache/hadoop/hive/conf/HiveConf.java| 4 +++- .../org/apache/hadoop/hive/conf/TestHiveConf.java | 14 ++ .../hadoop/hive/ql/exec/spark/SparkUtilities.java | 6 +- 3 files changed, 22 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 243f281..b5aee00 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2836,7 +2836,9 @@ public class HiveConf extends Configuration { // When either name or value is null, the set method below will fail, // and throw IllegalArgumentException set(name, value); - isSparkConfigUpdated = isSparkRelatedConfig(name); + if (isSparkRelatedConfig(name)) { +isSparkConfigUpdated = true; + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java -- diff --git a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java index 3b7a525..cd472c7 100644 --- a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java +++ b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java @@ -142,4 +142,18 @@ public class TestHiveConf { Assert.assertEquals("", conf2.get(HiveConf.ConfVars.METASTOREPWD.varname)); Assert.assertEquals("", conf2.get(HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname)); } + + @Test + public void testSparkConfigUpdate(){ +HiveConf conf = new HiveConf(); +Assert.assertFalse(conf.getSparkConfigUpdated()); + +conf.verifyAndSet("spark.master", "yarn-cluster"); +Assert.assertTrue(conf.getSparkConfigUpdated()); +conf.verifyAndSet("hive.execution.engine", "spark"); +Assert.assertTrue("Expected spark config updated.", conf.getSparkConfigUpdated()); + +conf.setSparkConfigUpdated(false); +Assert.assertFalse(conf.getSparkConfigUpdated()); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java index 0268469..a61cdc5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java @@ -121,12 +121,16 @@ public class SparkUtilities { public static SparkSession getSparkSession(HiveConf conf, SparkSessionManager sparkSessionManager) throws HiveException { SparkSession sparkSession = SessionState.get().getSparkSession(); +HiveConf sessionConf = SessionState.get().getConf(); // Spark configurations are updated close the existing session -if (conf.getSparkConfigUpdated()) { +// In case of async queries or confOverlay is not empty, +// sessionConf and conf are different objects +if (sessionConf.getSparkConfigUpdated() || conf.getSparkConfigUpdated()) { sparkSessionManager.closeSession(sparkSession); sparkSession = null; conf.setSparkConfigUpdated(false); + sessionConf.setSparkConfigUpdated(false); } sparkSession = sparkSessionManager.getSession(sparkSession, conf, true); SessionState.get().setSparkSession(sparkSession);
hive git commit: HIVE-12568: Provide an option to specify network interface used by Spark remote client [Spark Branch] (reviewed by Jimmy)
Repository: hive Updated Branches: refs/heads/spark e4b8cf43c -> 9af0b27bd HIVE-12568: Provide an option to specify network interface used by Spark remote client [Spark Branch] (reviewed by Jimmy) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9af0b27b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9af0b27b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9af0b27b Branch: refs/heads/spark Commit: 9af0b27bda6352eb229058db57a25fe65eb81f9a Parents: e4b8cf4 Author: xzhangAuthored: Mon Dec 7 11:10:25 2015 -0800 Committer: xzhang Committed: Mon Dec 7 11:10:25 2015 -0800 -- .../apache/hadoop/hive/common/ServerUtils.java | 19 +++ .../org/apache/hadoop/hive/conf/HiveConf.java | 5 ++ .../service/cli/thrift/ThriftCLIService.java| 15 +++--- .../hive/spark/client/rpc/RpcConfiguration.java | 57 +++- 4 files changed, 50 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java index 83517ce..b44f92f 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.common; +import java.net.InetAddress; +import java.net.UnknownHostException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; @@ -47,4 +50,20 @@ public class ServerUtils { } } + /** + * Get the Inet address of the machine of the given host name. + * @param hostname The name of the host + * @return The network address of the the host + * @throws UnknownHostException + */ + public static InetAddress getHostAddress(String hostname) throws UnknownHostException { +InetAddress serverIPAddress; +if (hostname != null && !hostname.isEmpty()) { + serverIPAddress = InetAddress.getByName(hostname); +} else { + serverIPAddress = InetAddress.getLocalHost(); +} +return serverIPAddress; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9e805bd..53ef428 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2387,6 +2387,11 @@ public class HiveConf extends Configuration { "Channel logging level for remote Spark driver. One of {DEBUG, ERROR, INFO, TRACE, WARN}."), SPARK_RPC_SASL_MECHANISM("hive.spark.client.rpc.sasl.mechanisms", "DIGEST-MD5", "Name of the SASL mechanism to use for authentication."), +SPARK_RPC_SERVER_ADDRESS("hive.spark.client.rpc.server.address", "", + "The server address of HiverServer2 host to be used for communication between Hive client and remote Spark driver. " + + "Default is empty, which means the address will be determined in the same way as for hive.server2.thrift.bind.host." + + "This is only necessary if the host has mutiple network addresses and if a different network address other than " + + "hive.server2.thrift.bind.host is to be used."), SPARK_DYNAMIC_PARTITION_PRUNING( "hive.spark.dynamic.partition.pruning", false, "When dynamic pruning is enabled, joins on partition keys will be processed by writing\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java -- diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index 8434965..d54f12c 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -35,6 +35,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.common.ServerUtils; import org.apache.hive.service.AbstractService; import org.apache.hive.service.ServiceException; import org.apache.hive.service.ServiceUtils; @@ -160,21 +161,19 @@ public abstract class ThriftCLIService
hive git commit: HIVE-12554: Fix Spark branch build after merge [Spark Branch] (Rui via Xuefu)
Repository: hive Updated Branches: refs/heads/spark 79035f1c5 -> 1a87bcc0f HIVE-12554: Fix Spark branch build after merge [Spark Branch] (Rui via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a87bcc0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a87bcc0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a87bcc0 Branch: refs/heads/spark Commit: 1a87bcc0f27e5a819035ac67fd68ace4c41301e9 Parents: 79035f1 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Tue Dec 1 10:49:04 2015 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Tue Dec 1 10:49:04 2015 -0800 -- .../apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java | 3 ++- ql/src/test/results/clientpositive/gen_udf_example_add10.q.out| 1 + .../test/results/clientpositive/spark/gen_udf_example_add10.q.out | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index d215873..ec0fdea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -27,6 +27,7 @@ import java.util.Properties; import java.util.Set; import org.apache.commons.compress.utils.CharsetNames; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -133,7 +134,7 @@ public class HiveSparkClientFactory { LOG.info(String.format( "load yarn property from hive configuration in %s mode (%s -> %s).", sparkMaster, propertyName, value)); - } else if (propertyName.equals(HiveConf.ConfVars.HADOOPFS.varname)) { + } else if (propertyName.equals(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)) { String value = hiveConf.get(propertyName); if (value != null && !value.isEmpty()) { sparkConf.put("spark.hadoop." + propertyName, value); http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out -- diff --git a/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out b/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out index 984554d..cab2ec8 100644 --- a/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out +++ b/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out @@ -43,6 +43,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double) sort order: -+ Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out b/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out index 05ec1f5..493d0a4 100644 --- a/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out +++ b/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out @@ -48,6 +48,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double) sort order: -+ Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Select Operator
[3/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)
HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9ca6870 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9ca6870 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9ca6870 Branch: refs/heads/master Commit: e9ca6870df889e03e8fa6888d7fbb51c4fbaf20a Parents: 3a17d42 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Mon Nov 30 21:37:11 2015 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Mon Nov 30 21:37:11 2015 -0800 -- .../hive/ql/parse/DDLSemanticAnalyzer.java | 226 --- .../apache/hadoop/hive/ql/parse/HiveParser.g| 23 +- .../test/queries/clientnegative/desc_failure4.q | 5 + .../queries/clientnegative/describe_xpath1.q| 2 +- .../queries/clientnegative/describe_xpath2.q| 2 +- .../queries/clientnegative/describe_xpath3.q| 2 +- .../queries/clientnegative/describe_xpath4.q| 2 +- .../alter_partition_update_status.q | 12 +- .../alter_table_invalidate_column_stats.q | 74 ++-- .../clientpositive/alter_table_update_status.q | 10 +- .../queries/clientpositive/analyze_tbl_part.q | 8 +- .../queries/clientpositive/colstats_all_nulls.q | 4 +- .../clientpositive/columnstats_part_coltype.q | 42 +- .../clientpositive/columnstats_partlvl.q| 12 +- .../clientpositive/columnstats_partlvl_dp.q | 20 +- .../queries/clientpositive/compustat_avro.q | 4 +- .../clientpositive/confirm_initial_tbl_stats.q | 22 +- .../queries/clientpositive/describe_syntax.q| 10 +- .../queries/clientpositive/describe_table.q | 64 ++- .../queries/clientpositive/describe_xpath.q | 12 +- .../extrapolate_part_stats_full.q | 2 +- .../extrapolate_part_stats_partial.q| 4 +- .../extrapolate_part_stats_partial_ndv.q| 44 +-- .../clientpositive/partition_coltype_literals.q | 4 +- .../queries/clientpositive/stats_only_null.q| 2 +- .../results/clientnegative/desc_failure3.q.out | 2 +- .../results/clientnegative/desc_failure4.q.out | 21 + .../clientnegative/describe_xpath1.q.out| 2 +- .../clientnegative/describe_xpath2.q.out| 2 +- .../clientnegative/describe_xpath3.q.out| 2 +- .../clientnegative/describe_xpath4.q.out| 2 +- .../clientnegative/drop_database_cascade.q.out | 2 +- .../alter_partition_update_status.q.out | 20 +- .../alter_table_invalidate_column_stats.q.out | 144 +++ .../alter_table_update_status.q.out | 20 +- .../results/clientpositive/ambiguitycheck.q.out | 4 +- .../clientpositive/analyze_tbl_part.q.out | 12 +- .../clientpositive/colstats_all_nulls.q.out | 8 +- .../columnstats_part_coltype.q.out | 84 ++-- .../clientpositive/columnstats_partlvl.q.out| 24 +- .../clientpositive/columnstats_partlvl_dp.q.out | 40 +- .../results/clientpositive/compustat_avro.q.out | 8 +- .../confirm_initial_tbl_stats.q.out | 44 +-- .../clientpositive/describe_syntax.q.out| 20 +- .../results/clientpositive/describe_table.q.out | 390 ++- .../results/clientpositive/describe_xpath.q.out | 24 +- .../extrapolate_part_stats_full.q.out | 4 +- .../extrapolate_part_stats_partial.q.out| 8 +- .../extrapolate_part_stats_partial_ndv.q.out| 88 ++--- .../clientpositive/llap/stats_only_null.q.out | 4 +- .../partition_coltype_literals.q.out| 8 +- .../clientpositive/spark/stats_only_null.q.out | 4 +- .../clientpositive/stats_only_null.q.out| 4 +- .../clientpositive/tez/stats_only_null.q.out| 4 +- 54 files changed, 999 insertions(+), 612 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index eea2fcc..757542d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1717,158 +1717,65 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { } } -// assume the first component of DOT delimited name is tableName -// get the attemptTableName -static public String getAttemptTableName(Hive db, String qualifiedName, boolean isColumn) -throws SemanticException { - // check whether the name starts with table - // DESCRIBE table -
[1/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)
Repository: hive Updated Branches: refs/heads/master 3a17d4230 -> e9ca6870d http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index 3ef6bc0..f0d8ff2 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -8,10 +8,10 @@ key string default value string default A masked pattern was here -PREHOOK: query: describe formatted src.key +PREHOOK: query: describe formatted src key PREHOOK: type: DESCTABLE PREHOOK: Input: default@src -POSTHOOK: query: describe formatted src.key +POSTHOOK: query: describe formatted src key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -27,10 +27,10 @@ key string default value string default A masked pattern was here -PREHOOK: query: describe formatted src1.value +PREHOOK: query: describe formatted src1 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src1 -POSTHOOK: query: describe formatted src1.value +POSTHOOK: query: describe formatted src1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -45,10 +45,10 @@ POSTHOOK: Input: default@src_json json string default A masked pattern was here -PREHOOK: query: describe formatted src_json.json +PREHOOK: query: describe formatted src_json json PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_json -POSTHOOK: query: describe formatted src_json.json +POSTHOOK: query: describe formatted src_json json POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_json # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -64,10 +64,10 @@ key string default value string default A masked pattern was here -PREHOOK: query: describe formatted src_sequencefile.value +PREHOOK: query: describe formatted src_sequencefile value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_sequencefile -POSTHOOK: query: describe formatted src_sequencefile.value +POSTHOOK: query: describe formatted src_sequencefile value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_sequencefile # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -83,10 +83,10 @@ key int value string A masked pattern was here -PREHOOK: query: describe formatted srcbucket.value +PREHOOK: query: describe formatted srcbucket value PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket -POSTHOOK: query: describe formatted srcbucket.value +POSTHOOK: query: describe formatted srcbucket value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -102,10 +102,10 @@ key int value string A masked pattern was here -PREHOOK: query: describe formatted srcbucket2.value +PREHOOK: query: describe formatted srcbucket2 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket2 -POSTHOOK: query: describe formatted srcbucket2.value +POSTHOOK: query: describe formatted srcbucket2 value POSTHOOK: type: DESCTABLE POSTHOOK:
[2/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out index 6ae4f25..f3c10ee 100644 --- a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out +++ b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out @@ -199,55 +199,55 @@ POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 A masked pattern was here -PREHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part1') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part1') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col1 int 27 484 0 8 from deserializer -PREHOOK: query: describe formatted statsdb1.testpart1 col2 partition (part = 'part1') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col2 partition (part = 'part1') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col2 string 0 12 6.7 7 from deserializer -PREHOOK: query: describe formatted statsdb1.testpart1 col3 partition (part = 'part1') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col3 partition (part = 'part1') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col3 string 0 1 4.0 4 from deserializer -PREHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part2') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part2') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col1 int 27 484 0 18
hive git commit: HIVE-12512: Include driver logs in execution-level Operation logs (Mohit via Xuefu)
Repository: hive Updated Branches: refs/heads/master e9ca6870d -> be410d24f HIVE-12512: Include driver logs in execution-level Operation logs (Mohit via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be410d24 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be410d24 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be410d24 Branch: refs/heads/master Commit: be410d24fe7e6598792b672d3fad950ed877a0b4 Parents: e9ca687 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Mon Nov 30 21:40:50 2015 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Mon Nov 30 21:40:50 2015 -0800 -- .../service/cli/operation/TestOperationLoggingAPIWithMr.java | 7 --- .../service/cli/operation/TestOperationLoggingAPIWithTez.java | 6 +++--- .../apache/hive/service/cli/operation/LogDivertAppender.java | 3 ++- 3 files changed, 9 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java index 0155b75..d21571e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java @@ -36,7 +36,7 @@ import org.junit.Test; * TestOperationLoggingAPIWithMr * Test the FetchResults of TFetchType.LOG in thrift level in MR mode. */ -public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase{ +public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase { @BeforeClass public static void setUpBeforeClass() throws Exception { @@ -45,10 +45,11 @@ public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase{ "Parsing command", "Parse Completed", "Starting Semantic Analysis", - "Semantic Analysis Completed", - "Starting command" }; expectedLogsExecution = new String[]{ + "Total jobs", + "Starting command", + "Semantic Analysis Completed", "Number of reduce tasks determined at compile time", "number of splits", "Submitting tokens for job", http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java index ab29861..3ffc3a4 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java @@ -20,11 +20,11 @@ public class TestOperationLoggingAPIWithTez extends OperationLoggingAPITestBase expectedLogsVerbose = new String[]{ "Parsing command", "Parse Completed", - "Starting Semantic Analysis", - "Semantic Analysis Completed", - "Starting command" + "Starting Semantic Analysis" }; expectedLogsExecution = new String[]{ + "Starting command", + "Semantic Analysis Completed", "Executing on YARN cluster with App id", "Setting Tez DAG access" }; http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java b/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java index 9cb6439..7531778 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java +++ b/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java @@ -23,6 +23,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.util.regex.Pattern; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.exec.Task; import org.
[83/91] [abbrv] hive git commit: HIVE-12307 - Streaming API TransactionBatch.close() must abort any remaining transactions in the batch(Eugene Koifman, reviewed by Alan Gates)
HIVE-12307 - Streaming API TransactionBatch.close() must abort any remaining transactions in the batch(Eugene Koifman, reviewed by Alan Gates) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1ac5a39 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1ac5a39 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1ac5a39 Branch: refs/heads/spark Commit: f1ac5a391a18fccf724249038fca73e7b55854e2 Parents: 6d4dfa4 Author: Eugene KoifmanAuthored: Thu Nov 26 11:48:03 2015 -0800 Committer: Eugene Koifman Committed: Thu Nov 26 11:48:29 2015 -0800 -- .../streaming/AbstractRecordWriter.java | 32 ++- .../hcatalog/streaming/ConnectionError.java | 3 +- .../streaming/DelimitedInputWriter.java | 2 +- .../hive/hcatalog/streaming/HiveEndPoint.java | 211 +-- .../hcatalog/streaming/StrictJsonWriter.java| 2 +- .../hcatalog/streaming/TransactionBatch.java| 1 + .../hcatalog/streaming/TransactionError.java| 2 +- .../hive/hcatalog/streaming/TestStreaming.java | 167 +++ .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 3 + 9 files changed, 344 insertions(+), 79 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f1ac5a39/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java -- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java index 5c15675..0c6b9ea 100644 --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java @@ -65,6 +65,8 @@ public abstract class AbstractRecordWriter implements RecordWriter { final AcidOutputFormat outf; private Object[] bucketFieldData; // Pre-allocated in constructor. Updated on each write. + private Long curBatchMinTxnId; + private Long curBatchMaxTxnId; protected AbstractRecordWriter(HiveEndPoint endPoint, HiveConf conf) throws ConnectionError, StreamingException { @@ -98,6 +100,12 @@ public abstract class AbstractRecordWriter implements RecordWriter { } } + /** + * used to tag error msgs to provied some breadcrumbs + */ + String getWatermark() { +return partitionPath + " txnIds[" + curBatchMinTxnId + "," + curBatchMaxTxnId + "]"; + } // return the column numbers of the bucketed columns private List getBucketColIDs(List bucketCols, List cols) { ArrayList result = new ArrayList(bucketCols.size()); @@ -164,22 +172,32 @@ public abstract class AbstractRecordWriter implements RecordWriter { throws StreamingIOFailure, SerializationError { try { LOG.debug("Creating Record updater"); + curBatchMinTxnId = minTxnId; + curBatchMaxTxnId = maxTxnID; updaters = createRecordUpdaters(totalBuckets, minTxnId, maxTxnID); } catch (IOException e) { - LOG.error("Failed creating record updater", e); - throw new StreamingIOFailure("Unable to get new record Updater", e); + String errMsg = "Failed creating RecordUpdaterS for " + getWatermark(); + LOG.error(errMsg, e); + throw new StreamingIOFailure(errMsg, e); } } @Override public void closeBatch() throws StreamingIOFailure { -try { - for (RecordUpdater updater : updaters) { +boolean haveError = false; +for (RecordUpdater updater : updaters) { + try { +//try not to leave any files open updater.close(false); } - updaters.clear(); -} catch (IOException e) { - throw new StreamingIOFailure("Unable to close recordUpdater", e); + catch(Exception ex) { +haveError = true; +LOG.error("Unable to close " + updater + " due to: " + ex.getMessage(), ex); + } +} +updaters.clear(); +if(haveError) { + throw new StreamingIOFailure("Encountered errors while closing (see logs) " + getWatermark()); } } http://git-wip-us.apache.org/repos/asf/hive/blob/f1ac5a39/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java -- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java index ffa51c9..03f6a44 100644 --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/ConnectionError.java +++
[08/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 2c14065..fa80956 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -134,10 +134,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -153,27 +157,31 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) -outputColumnNames: _col0, _col1, _col2 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col3 +input vertices: + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false +Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.TextInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -211,10 +219,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 _col0 (type: int) -1 key (type: int) +Select Operator + expressions: key (type: int) +
[25/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out index 8e7078f..f6323f2 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out @@ -128,11 +128,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: s1 @@ -140,22 +144,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE -Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: count() -mode: hash -outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - sort order: +Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 Execution mode: uber @@ -203,14 +211,32 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) A masked pattern was here Vertices: Map 1 Map Operator Tree: TableScan + alias: vt1 + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: key is not null (type: boolean) +Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE +Execution mode:
[03/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out index 180787b..b1850b6 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -81,12 +81,16 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) +Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) +Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -95,12 +99,16 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) +Select Operator + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 4 Map Operator Tree: TableScan @@ -109,12 +117,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) +Select Operator + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) +
[64/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out index eeb18b0..93a7ca4 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out @@ -139,6 +139,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) A masked pattern was here Vertices: Map 1 @@ -165,37 +167,14 @@ STAGE PLANS: expressions: _col0 (type: int), _col7 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - GlobalTableId: 1 - A masked pattern was here - NumFilesPerFileSink: 1 - Static Partition Specification: ds=1/ +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE - A masked pattern was here - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: -SORTBUCKETCOLSPREFIX TRUE -bucket_count 16 -bucket_field_name key -columns key,value -columns.comments -columns.types int:string - A masked pattern was here -name default.test_table3 -partition_columns ds -partition_columns.types string -serialization.ddl struct test_table3 { i32 key, string value} -serialization.format 1 -serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - A masked pattern was here - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -249,6 +228,44 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1/ds=1 [a] +Reducer 2 +Needs Tagging: false +Reduce Operator Tree: + Select Operator +expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + GlobalTableId: 1 + A masked pattern was here + NumFilesPerFileSink: 16 + Static Partition Specification: ds=1/ + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + A masked pattern was here + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: +SORTBUCKETCOLSPREFIX TRUE +bucket_count 16 +bucket_field_name key +columns key,value +columns.comments +columns.types int:string + A masked pattern was here +name default.test_table3 +partition_columns ds +partition_columns.types string +serialization.ddl struct test_table3 { i32 key, string value} +
[50/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_join26.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out index 94ab76f..5f9531b 100644 --- a/ql/src/test/results/clientpositive/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -28,11 +28,11 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: -$hdt$_0:$hdt$_1:x +$hdt$_0:$hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -$hdt$_0:$hdt$_1:x +$hdt$_0:$hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -67,24 +67,20 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col1 (type: string) -outputColumnNames: _col0 + Group By Operator +aggregations: count(1) +keys: _col0 (type: string) +mode: hash +outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: _col0 (type: string) -sort order: + -Map-reduce partition columns: _col0 (type: string) -Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_join32.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out index 161ab6b..9b32047 100644 --- a/ql/src/test/results/clientpositive/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -35,21 +35,25 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: -s +$hdt$_0:s Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -s +$hdt$_0:s TableScan alias: s Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator -keys: - 0 name (type: string) - 1 name (type: string) + Select Operator +expressions: name (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) Stage: Stage-2 Map Reduce @@ -60,25 +64,29 @@ STAGE PLANS: Filter Operator predicate: name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 name (type: string) - 1 name (type: string) -outputColumnNames: _col0, _col8 + Select Operator +expressions: name (type: string), registration (type: string) +outputColumnNames: _col0,
[13/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out -- diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index c0a8959..441338e 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -46,6 +46,9 @@ POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwr POSTHOOK: type: LOAD A masked pattern was here POSTHOOK: Output: default@smb_bucket_3 +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-4:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -68,123 +71,107 @@ STAGE PLANS: Filter Operator predicate: (key = 5) (type: boolean) Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: 5 (type: int) -sort order: + -Map-reduce partition columns: 5 (type: int) + Select Operator Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE TableScan alias: b Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 5) (type: boolean) + predicate: (5 = key) (type: boolean) Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: 5 (type: int) -sort order: + -Map-reduce partition columns: 5 (type: int) + Select Operator Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 key (type: int) -1 key (type: int) +0 +1 Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: 5 (type: int) -outputColumnNames: _col0 -Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: -0 _col0 (type: int) -1 _col0 (type: int) -
[63/91] [abbrv] hive git commit: HIVE-9599 : remove derby, datanucleus and other not related to jdbc client classes from hive-jdbc-standalone.jar (Ashutosh Chauhan via Thejas Nair)
HIVE-9599 : remove derby, datanucleus and other not related to jdbc client classes from hive-jdbc-standalone.jar (Ashutosh Chauhan via Thejas Nair) Signed-off-by: Ashutosh ChauhanProject: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1b6600de Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1b6600de Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1b6600de Branch: refs/heads/spark Commit: 1b6600de0f908170061628a6b5ed4f072012cc96 Parents: b7281ce Author: Ashutosh Chauhan Authored: Wed Nov 18 15:00:30 2015 -0800 Committer: Ashutosh Chauhan Committed: Tue Nov 24 15:06:23 2015 -0800 -- jdbc/pom.xml | 52 1 file changed, 52 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1b6600de/jdbc/pom.xml -- diff --git a/jdbc/pom.xml b/jdbc/pom.xml index ea961a4..f8d7dfd 100644 --- a/jdbc/pom.xml +++ b/jdbc/pom.xml @@ -189,6 +189,58 @@ org.antlr:* org.slf4j:slf4j-log4j12 log4j:* + antlr:* + aopalliance:* + asm:* + com.google.code.gson:* + com.google.inject:* + com.google.inject.extensions:* + com.jamesmurty.utils:* + com.jcraft:* + com.jolbox:* + commons-beanutils:* + commons-cli:* + commons-dbcp:* + commons-digester:* + commons-el:* + commons-httpclient:* + commons-io:* + commons-net:* + commons-pool:* + com.google.code.findbugs:* + com.google.protobuf:* + com.sun.xml.bind:* + com.thoughtworks.paranamer:* + com.twitter:* + io.netty:* + javax.activation:* + javax.inject:* + javax.jdo:* + javax.mail:* + javax.servlet:* + javax.servlet.jsp:* + javax.transaction:* + javax.xml.bind:* + javax.xml.stream:* + jline:* + joda-time:* + net.java.dev.jets3t:* + org.apache.commons:commons-math3 + org.apache.curator:* + org.apache.derby:* + org.apache.directory.api:* + org.apache.directory.server:* + org.apache.geronimo.specs:* + org.apache.zookeeper:* + org.codehaus.jackson:* + org.codehaus.jettison:* + org.datanucleus:* + org.fusesource.leveldbjni:* + org.htrace:* + org.mortbay.jetty:* + org.xerial.snappy:* + tomcat:* + xmlenc:*
[12/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out b/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out index d72b4f3..71fe68e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out @@ -50,10 +50,14 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 (_col0 + _col5) (type: double) -1 UDFToDouble(key) (type: double) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + 1 UDFToDouble(_col0) (type: double) Local Work: Map Reduce Local Work @@ -71,24 +75,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Local Work: Map Reduce Local Work @@ -97,34 +109,30 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) -outputColumnNames: _col0, _col5 + 0 _col0 (type: string) + 1 _col0 (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) + predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 (_col0 + _col5) (type: double) - 1 UDFToDouble(key) (type: double) -outputColumnNames: _col0, _col5, _col10 + 0 (UDFToDouble(_col0)
[20/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/multiMapJoin2.q.out -- diff --git a/ql/src/test/results/clientpositive/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/multiMapJoin2.q.out index 46b717f..dee81c2 100644 --- a/ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -2079,21 +2079,25 @@ STAGE PLANS: Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: -y +$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -y +$hdt$_1:y TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator -keys: - 0 key (type: string) - 1 key (type: string) + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) Stage: Stage-2 Map Reduce @@ -2104,22 +2108,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 63 Data size: 635 Basic stats: COMPLETE Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Statistics: Num rows: 69 Data size: 698 Basic stats: COMPLETE Column stats: NONE -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -sort order: + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 63 Data size: 635 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Statistics: Num rows: 69 Data size: 698 Basic stats: COMPLETE Column stats: NONE + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: bigint) +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/multi_join_union.q.out -- diff --git a/ql/src/test/results/clientpositive/multi_join_union.q.out b/ql/src/test/results/clientpositive/multi_join_union.q.out index 466f34b..76c837f 100644 --- a/ql/src/test/results/clientpositive/multi_join_union.q.out +++ b/ql/src/test/results/clientpositive/multi_join_union.q.out @@ -53,36 +53,40 @@ src12 b ON (a.key = b.key) JOIN (SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: -a +$hdt$_0:a Fetch Operator limit: -1 -c-subquery1:a-subquery1:src13 +$hdt$_2-subquery1:$hdt$_2-subquery1:src13 Fetch Operator limit: -1 -c-subquery2:a-subquery2:src14 +$hdt$_2-subquery2:$hdt$_2-subquery2:src14 Fetch Operator limit: -1
[47/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out index 853f641..0c8aa21 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out @@ -103,35 +103,43 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) -outputColumnNames: _col1 -Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator +expressions: key (type: int), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: int) +1 _col0 (type: int) + outputColumnNames: _col1 + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: value (type: string) -sort order: + -Map-reduce partition columns: value (type: string) + Select Operator +expressions: value (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) -1 value (type: string) +1 _col0 (type: string) Group By Operator aggregations: count() mode: hash @@ -206,35 +214,43 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) -outputColumnNames: _col1 -Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator +expressions: key (type: int), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: int) +1 _col0 (type: int) + outputColumnNames: _col1 + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) TableScan alias: d Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type:
[89/91] [abbrv] hive git commit: HIVE-12338: Add webui to HiveServer2 (Jimmy, reviewed by Mohit, Szehon, Lefty)
HIVE-12338: Add webui to HiveServer2 (Jimmy, reviewed by Mohit, Szehon, Lefty) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2c0c191c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2c0c191c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2c0c191c Branch: refs/heads/spark Commit: 2c0c191cdd6b2d1aebe4502e24cc2b3d041bf3ca Parents: a51e5d4 Author: Jimmy XiangAuthored: Thu Nov 19 08:10:29 2015 -0800 Committer: Jimmy Xiang Committed: Sun Nov 29 09:55:09 2015 -0800 -- common/pom.xml | 5 + .../org/apache/hadoop/hive/conf/HiveConf.java | 5 + .../hive/http/AdminAuthorizedServlet.java | 45 ++ .../java/org/apache/hive/http/ConfServlet.java | 101 + .../java/org/apache/hive/http/HttpServer.java | 316 ++ .../org/apache/hive/http/JMXJsonServlet.java| 412 +++ pom.xml | 1 + ql/pom.xml | 6 + service/pom.xml | 56 +++ .../hive/service/cli/operation/Operation.java | 2 +- .../service/cli/operation/OperationManager.java | 26 +- .../service/cli/operation/SQLOperation.java | 8 +- .../service/cli/session/HiveSessionBase.java| 4 + .../service/cli/session/HiveSessionImpl.java| 12 + .../service/cli/session/SessionManager.java | 16 +- .../apache/hive/service/server/HiveServer2.java | 47 +++ .../hive-webapps/hiveserver2/hiveserver2.jsp| 186 + .../hive-webapps/hiveserver2/index.html | 20 + .../static/css/bootstrap-theme.min.css | 10 + .../hive-webapps/static/css/bootstrap.min.css | 9 + .../resources/hive-webapps/static/css/hive.css | 24 ++ .../fonts/glyphicons-halflings-regular.eot | Bin 0 -> 14079 bytes .../fonts/glyphicons-halflings-regular.svg | 228 ++ .../fonts/glyphicons-halflings-regular.ttf | Bin 0 -> 29512 bytes .../fonts/glyphicons-halflings-regular.woff | Bin 0 -> 16448 bytes .../hive-webapps/static/hive_logo.jpeg | Bin 0 -> 5616 bytes spark-client/pom.xml| 6 + 27 files changed, 1529 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/common/pom.xml -- diff --git a/common/pom.xml b/common/pom.xml index ee74282..72bb550 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -56,6 +56,11 @@ ${commons-lang.version} + org.eclipse.jetty.aggregate + jetty-all + ${jetty.version} + + joda-time joda-time ${joda.version} http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index db942b0..9e805bd 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1847,6 +1847,11 @@ public class HiveConf extends Configuration { HIVE_SERVER2_PARALLEL_COMPILATION("hive.driver.parallel.compilation", false, "Whether to\n" + "enable parallel compilation between sessions on HiveServer2. The default is false."), +// HiveServer2 WebUI +HIVE_SERVER2_WEBUI_BIND_HOST("hive.server2.webui.host", "0.0.0.0", "The host address the HiveServer2 WebUI will listen on"), +HIVE_SERVER2_WEBUI_PORT("hive.server2.webui.port", 10002, "The port the HiveServer2 WebUI will listen on"), +HIVE_SERVER2_WEBUI_MAX_THREADS("hive.server2.webui.max.threads", 50, "The max HiveServer2 WebUI threads"), + // Tez session settings HIVE_SERVER2_TEZ_DEFAULT_QUEUES("hive.server2.tez.default.queues", "", "A list of comma separated values corresponding to YARN queues of the same name.\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java -- diff --git a/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java b/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java new file mode 100644 index 000..5d957c2 --- /dev/null +++ b/common/src/java/org/apache/hive/http/AdminAuthorizedServlet.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + *
[56/91] [abbrv] hive git commit: HIVE-12456: QueryId can't be stored in the configuration of the SessionState since multiple queries can run in a single session (Aihua Xu, reviewed by Mohit)
HIVE-12456: QueryId can't be stored in the configuration of the SessionState since multiple queries can run in a single session (Aihua Xu, reviewed by Mohit) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2604cf26 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2604cf26 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2604cf26 Branch: refs/heads/spark Commit: 2604cf26ae36c4211bf155e2032398cc7344f641 Parents: f90d798 Author: Aihua XuAuthored: Mon Nov 23 12:20:39 2015 -0500 Committer: Aihua Xu Committed: Mon Nov 23 12:20:39 2015 -0500 -- .../cli/operation/ExecuteStatementOperation.java | 15 +-- .../hive/service/cli/operation/Operation.java| 19 +++ .../hive/service/cli/operation/SQLOperation.java | 4 ++-- .../service/cli/session/HiveSessionImpl.java | 1 - 4 files changed, 18 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2604cf26/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java index 3f2de10..b3d9b52 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java @@ -18,7 +18,6 @@ package org.apache.hive.service.cli.operation; import java.sql.SQLException; -import java.util.HashMap; import java.util.Map; import org.apache.hadoop.hive.ql.processors.CommandProcessor; @@ -29,13 +28,11 @@ import org.apache.hive.service.cli.session.HiveSession; public abstract class ExecuteStatementOperation extends Operation { protected String statement = null; - protected Map confOverlay = new HashMap (); public ExecuteStatementOperation(HiveSession parentSession, String statement, Map confOverlay, boolean runInBackground) { -super(parentSession, OperationType.EXECUTE_STATEMENT, runInBackground); +super(parentSession, confOverlay, OperationType.EXECUTE_STATEMENT, runInBackground); this.statement = statement; -setConfOverlay(confOverlay); } public String getStatement() { @@ -57,14 +54,4 @@ public abstract class ExecuteStatementOperation extends Operation { } return new HiveCommandOperation(parentSession, statement, processor, confOverlay); } - - protected Map getConfOverlay() { -return confOverlay; - } - - protected void setConfOverlay(Map confOverlay) { -if (confOverlay != null) { - this.confOverlay = confOverlay; -} - } } http://git-wip-us.apache.org/repos/asf/hive/blob/2604cf26/service/src/java/org/apache/hive/service/cli/operation/Operation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java index d13415e..25cefc2 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java @@ -21,11 +21,14 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import com.google.common.collect.Sets; + import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; @@ -50,8 +53,8 @@ import org.apache.logging.log4j.ThreadContext; public abstract class Operation { // Constants of the key strings for the log4j ThreadContext. - private static final String QUERYID = "QueryId"; - private static final String SESSIONID = "SessionId"; + public static final String SESSIONID_LOG_KEY = "sessionId"; + public static final String QUERYID_LOG_KEY = "queryId"; protected final HiveSession parentSession; private OperationState state = OperationState.INITIALIZED; @@ -67,6 +70,7 @@ public abstract class Operation { protected volatile Future backgroundHandle; protected OperationLog operationLog; protected boolean isOperationLogEnabled; + protected Map confOverlay = new HashMap (); private long operationTimeout; private long lastAccessTime; @@ -75,7
[55/91] [abbrv] hive git commit: HIVE-12409 make sure SessionState.initTxnMgr() is thread safe (Eugene Koifman, reviewed by Jason Dere)
HIVE-12409 make sure SessionState.initTxnMgr() is thread safe (Eugene Koifman, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f90d798e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f90d798e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f90d798e Branch: refs/heads/spark Commit: f90d798e830d56745c8bc0cfee35741ed66aab90 Parents: 695d905 Author: Eugene KoifmanAuthored: Mon Nov 23 08:20:06 2015 -0800 Committer: Eugene Koifman Committed: Mon Nov 23 08:20:06 2015 -0800 -- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f90d798e/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index ff875df..5c69fb6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -400,7 +400,7 @@ public class SessionState { * @return transaction manager * @throws LockException */ - public HiveTxnManager initTxnMgr(HiveConf conf) throws LockException { + public synchronized HiveTxnManager initTxnMgr(HiveConf conf) throws LockException { if (txnMgr == null) { txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf); }
[86/91] [abbrv] hive git commit: HIVE-12008: Hive queries failing when using count(*) on column in view (Yongzhi Chen, reviewed by Szehon ho)
HIVE-12008: Hive queries failing when using count(*) on column in view (Yongzhi Chen, reviewed by Szehon ho) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a51e5d4e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a51e5d4e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a51e5d4e Branch: refs/heads/spark Commit: a51e5d4e261d6eb334497f768059829e36a99fd3 Parents: 7984738 Author: Yongzhi ChenAuthored: Sun Nov 29 01:06:22 2015 -0500 Committer: Yongzhi Chen Committed: Sun Nov 29 01:06:22 2015 -0500 -- .../hadoop/hive/ql/exec/UnionOperator.java | 8 +- .../hive/ql/optimizer/ColumnPrunerProcCtx.java | 2 +- .../clientpositive/unionall_unbalancedppd.q | 57 ++- .../results/clientpositive/spark/union16.q.out | 16 +- .../results/clientpositive/spark/union2.q.out | 16 +- .../results/clientpositive/spark/union9.q.out | 16 +- .../clientpositive/spark/union_view.q.out | 24 -- .../results/clientpositive/tez/union2.q.out | 28 +- .../results/clientpositive/tez/union9.q.out | 40 +-- .../tez/vector_null_projection.q.out| 4 - .../test/results/clientpositive/union16.q.out | 354 --- ql/src/test/results/clientpositive/union2.q.out | 32 +- ql/src/test/results/clientpositive/union9.q.out | 46 ++- .../results/clientpositive/union_view.q.out | 24 -- .../clientpositive/unionall_unbalancedppd.q.out | 261 +++--- .../clientpositive/vector_null_projection.q.out | 4 - 16 files changed, 388 insertions(+), 544 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java index a49097c..ddb23ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java @@ -62,13 +62,16 @@ public class UnionOperator extends Operator implements Serializable { int parents = parentOperators.size(); parentObjInspectors = new StructObjectInspector[parents]; parentFields = new List[parents]; +int columns = 0; for (int p = 0; p < parents; p++) { parentObjInspectors[p] = (StructObjectInspector) inputObjInspectors[p]; parentFields[p] = parentObjInspectors[p].getAllStructFieldRefs(); + if (p == 0 || parentFields[p].size() < columns) { +columns = parentFields[p].size(); + } } // Get columnNames from the first parent -int columns = parentFields[0].size(); ArrayList columnNames = new ArrayList(columns); for (int c = 0; c < columns; c++) { columnNames.add(parentFields[0].get(c).getFieldName()); @@ -81,7 +84,8 @@ public class UnionOperator extends Operator implements Serializable { } for (int p = 0; p < parents; p++) { - assert (parentFields[p].size() == columns); + //When columns is 0, the union operator is empty. + assert (columns == 0 || parentFields[p].size() == columns); for (int c = 0; c < columns; c++) { if (!columnTypeResolvers[c].updateForUnionAll(parentFields[p].get(c) .getFieldObjectInspector())) { http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java index b18a034..7befd3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java @@ -108,7 +108,7 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx { prunList = joinPrunedColLists.get(child).get((byte) tag); } else if (child instanceof UnionOperator) { List positions = unionPrunedColLists.get(child); -if (positions != null && positions.size() > 0) { +if (positions != null) { prunList = new ArrayList<>(); RowSchema oldRS = curOp.getSchema(); for (Integer pos : positions) { http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q -- diff --git a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q index 360ad11..a704860 100644 ---
[05/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 8d3f0d8..6c57ff2 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -2748,20 +2748,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: _col0 (type: string) -sort order: + -Map-reduce partition columns: _col0 (type: string) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator +aggregations: count(1) +keys: _col0 (type: string) +mode: final +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -tag: 0 -value expressions: _col1 (type: bigint) -auto parallelism: false +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -2825,20 +2829,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: _col0 (type: string) -sort order: + -Map-reduce partition columns: _col0 (type: string) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator +aggregations: count(1) +keys: _col0 (type: string) +mode: final +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -tag: 1 -value expressions: _col1 (type: bigint) -auto parallelism: false +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -3113,20 +3121,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -
[82/91] [abbrv] hive git commit: HIVE-12496 : Open ServerTransport After MetaStore Initialization (Nemon Lou via Ashutosh Chauhan)
HIVE-12496 : Open ServerTransport After MetaStore Initialization (Nemon Lou via Ashutosh Chauhan) Signed-off-by: Ashutosh ChauhanProject: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6d4dfa40 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6d4dfa40 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6d4dfa40 Branch: refs/heads/spark Commit: 6d4dfa40bc6a70c7b4d7de0241b1868937d66b00 Parents: a9d3b09 Author: Nemon Lou Authored: Thu Nov 26 11:44:34 2015 -0800 Committer: Ashutosh Chauhan Committed: Thu Nov 26 11:44:34 2015 -0800 -- .../java/org/apache/hadoop/hive/metastore/HiveMetaStore.java| 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/6d4dfa40/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java -- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index a835f6a..00602e1 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -5978,8 +5978,6 @@ public class HiveMetaStore extends ThriftHiveMetastore { boolean useCompactProtocol = conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_COMPACT_PROTOCOL); useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL); - TServerTransport serverTransport = tcpKeepAlive ? - new TServerSocketKeepAlive(port) : new TServerSocket(port); TProcessor processor; TTransportFactory transFactory; @@ -6027,6 +6025,9 @@ public class HiveMetaStore extends ThriftHiveMetastore { LOG.info("Starting DB backed MetaStore Server"); } } + + TServerTransport serverTransport = tcpKeepAlive ? +new TServerSocketKeepAlive(port) : new TServerSocket(port); TThreadPoolServer.Args args = new TThreadPoolServer.Args(serverTransport) .processor(processor)
[60/91] [abbrv] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ab98ffc2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ab98ffc2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ab98ffc2 Branch: refs/heads/spark Commit: ab98ffc2688abbc75de13524ca46848e566354ef Parents: 306a640 Author: Pengcheng XiongAuthored: Tue Nov 24 17:09:40 2015 +0800 Committer: Pengcheng Xiong Committed: Tue Nov 24 17:09:40 2015 +0800 -- .../hadoop/hive/common/StatsSetupConst.java | 13 - .../org/apache/hadoop/hive/conf/HiveConf.java | 8 +- data/conf/llap/hive-site.xml| 2 +- data/conf/spark/standalone/hive-site.xml| 2 +- data/conf/spark/yarn-client/hive-site.xml | 2 +- data/conf/tez/hive-site.xml | 4 +- .../hive/ql/stats/CounterStatsAggregator.java | 82 .../ql/stats/CounterStatsAggregatorSpark.java | 58 --- .../ql/stats/CounterStatsAggregatorTez.java | 79 .../hive/ql/stats/CounterStatsPublisher.java| 66 --- .../hadoop/hive/ql/stats/StatsFactory.java | 11 - .../test/queries/clientpositive/index_bitmap3.q | 1 - .../queries/clientpositive/index_bitmap_auto.q | 1 - .../test/queries/clientpositive/stats_counter.q | 16 - .../clientpositive/stats_counter_partitioned.q | 45 -- .../clientpositive/llap/stats_counter.q.out | 102 .../llap/stats_counter_partitioned.q.out| 465 --- .../clientpositive/spark/stats_counter.q.out| 102 .../spark/stats_counter_partitioned.q.out | 465 --- .../results/clientpositive/stats_counter.q.out | 102 .../stats_counter_partitioned.q.out | 465 --- .../clientpositive/tez/metadataonly1.q.out | 72 +-- .../clientpositive/tez/optimize_nullscan.q.out | 90 ++-- .../clientpositive/tez/stats_counter.q.out | 102 .../tez/stats_counter_partitioned.q.out | 465 --- 25 files changed, 88 insertions(+), 2732 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 0a44bde..2ff76ee 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -31,19 +31,6 @@ import java.util.Map; public class StatsSetupConst { public enum StatDB { -counter { - @Override - public String getPublisher(Configuration conf) { -return "org.apache.hadoop.hive.ql.stats.CounterStatsPublisher"; } - @Override - public String getAggregator(Configuration conf) { -if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { - return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez"; -} else if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { - return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorSpark"; -} -return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregator"; } -}, fs { @Override public String getPublisher(Configuration conf) { http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f48403b..fffedd9 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1354,10 +1354,10 @@ public class HiveConf extends Configuration { // Statistics HIVESTATSAUTOGATHER("hive.stats.autogather", true, "A flag to gather statistics automatically during the INSERT OVERWRITE command."), -HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("counter", "custom", "fs"), +HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + "each task writes statistics it has collected in a file on the filesystem, which will be aggregated \n" + -"after the job has finished. Supported values are fs (filesystem), counter, and custom as defined in
[07/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index bcef03c..d8ade07 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -113,10 +113,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -134,26 +138,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: int), concat(_col1, _col7) (type: string) -outputColumnNames: _col0, _col1 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col4 +input vertices: + 0 Map 1 Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) +Select Operator + expressions: _col0 (type: int), concat(_col1, _col4) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE +value expressions: _col1 (type: string) Local Work: Map Reduce Local Work Reducer 3 @@ -285,10 +293,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +
[26/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out index 36a032a..3b634be 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out @@ -788,38 +788,46 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: string) -1 key (type: string) - input vertices: -1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: count() -mode: hash -outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - sort order: +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: string) + 1 _col0 (type: string) +input vertices: + 1 Map 3 +Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: bigint) Execution mode: llap Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE Execution mode: llap Reducer 2 Execution mode: uber @@ -900,39 +908,47 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: string) -1 key (type: string) - input vertices: -1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator -
[78/91] [abbrv] hive git commit: HIVE-8396 : Hive CliDriver command splitting can be broken when comments are present (Elliot West, reviewed by Sergey Shelukhin)
HIVE-8396 : Hive CliDriver command splitting can be broken when comments are present (Elliot West, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ae374a3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ae374a3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ae374a3 Branch: refs/heads/spark Commit: 0ae374a320d1cae523ba2b434800e97692507db8 Parents: 454c2ca Author: Sergey ShelukhinAuthored: Wed Nov 25 15:13:27 2015 -0800 Committer: Sergey Shelukhin Committed: Wed Nov 25 15:13:27 2015 -0800 -- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0ae374a3/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java -- diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index e04f247..e77b7f1 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -770,6 +770,9 @@ public class CliDriver { if (!prefix.equals("")) { prefix += '\n'; } + if (line.trim().startsWith("--")) { +continue; + } if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) { line = prefix + line; ret = cli.processLine(line, true);
[39/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer5.q.out -- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 7f2e19f..d33ca0f 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -125,29 +125,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE TableScan alias: y Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 key (type: int) -1 key (type: int) +0 _col0 (type: int) +1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -180,10 +188,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + outputColumnNames: _col0, _col2 Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: _col0 (type: int), _col3 (type: string) +expressions: _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -217,34 +225,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE TableScan alias: n Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int),
[19/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/ppd_join5.q.out -- diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out index 0807559..1b46ed5 100644 --- a/ql/src/test/results/clientpositive/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -58,31 +58,39 @@ STAGE PLANS: Filter Operator predicate: (id1 is not null and id2 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: id1 (type: string), id2 (type: string) -sort order: ++ -Map-reduce partition columns: id1 (type: string), id2 (type: string) + Select Operator +expressions: id1 (type: string), id2 (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE TableScan alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (d <= 1)) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: id (type: string), id (type: string) -sort order: ++ -Map-reduce partition columns: id (type: string), id (type: string) + Select Operator +expressions: id (type: string), d (type: int) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE -value expressions: d (type: int) +Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 id1 (type: string), id2 (type: string) -1 id (type: string), id (type: string) - outputColumnNames: _col0, _col1, _col6 +0 _col0 (type: string), _col1 (type: string) +1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -98,17 +106,21 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) TableScan -alias: c +alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -sort order: + Select Operator +expressions: d (type: int) +outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE -value expressions: d (type: int) +Reduce Output Operator + sort
[49/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index f1aadef..85a685b 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -157,23 +157,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 1 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 1 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -274,8 +278,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [b] -/bucket_big/ds=2008-04-09 [b] +/bucket_big/ds=2008-04-08 [$hdt$_1:b] +/bucket_big/ds=2008-04-09 [$hdt$_1:b] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -379,23 +383,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 0 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 0 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -496,8 +504,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [a] -/bucket_big/ds=2008-04-09 [a] +/bucket_big/ds=2008-04-08 [$hdt$_0:a] +
[65/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out deleted file mode 100644 index b1dfd7c..000 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out +++ /dev/null @@ -1,553 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) -create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) -create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tst1 -PREHOOK: query: alter table tst1 clustered by (key) into 8 buckets -PREHOOK: type: ALTERTABLE_CLUSTER_SORT -PREHOOK: Input: default@tst1 -PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (key) into 8 buckets -POSTHOOK: type: ALTERTABLE_CLUSTER_SORT -POSTHOOK: Input: default@tst1 -POSTHOOK: Output: default@tst1 -PREHOOK: query: describe formatted tst1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@tst1 -POSTHOOK: query: describe formatted tst1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@tst1 -# col_name data_type comment - -keystring -value string - -# Partition Information -# col_name data_type comment - -ds string - -# Detailed Table Information -Database: default - A masked pattern was here -Retention: 0 - A masked pattern was here -Table Type:MANAGED_TABLE -Table Parameters: - A masked pattern was here - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed:No -Num Buckets: 8 -Bucket Columns:[key] -Sort Columns: [] -Storage Desc Params: - serialization.format1 -PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tst1@ds=1 -POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tst1@ds=1 -POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted tst1 partition (ds = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@tst1 -POSTHOOK: query: describe formatted tst1 partition (ds = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@tst1 -# col_name data_type comment - -keystring -value string - -# Partition Information -# col_name data_type comment - -ds string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: tst1 - A masked pattern was here -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles1 - numRows 500 - rawDataSize 5312 - totalSize 5812 - A masked pattern was here - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat:
[24/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index e9192a3..10b4168 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -210,41 +210,49 @@ STAGE PLANS: alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: ds (type: string) -sort order: + -Map-reduce partition columns: ds (type: string) + Select Operator +expressions: ds (type: string) +outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: llap Map 4 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) +predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator -keys: _col0 (type: string) -mode: hash + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE -Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds +Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 + Dynamic Partitioning Event Operator +Target Input: srcpart +Partition key expr: ds +Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE +Target column: ds +Target Vertex: Map 1 Execution mode: vectorized, llap Reducer 2 Execution mode: llap @@ -253,8 +261,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 ds (type: string) - 1 ds (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 2200 Data size:
[14/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoinopt4.q.out -- diff --git a/ql/src/test/results/clientpositive/skewjoinopt4.q.out b/ql/src/test/results/clientpositive/skewjoinopt4.q.out index 1d2a5a4..28fb7df 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt4.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt4.q.out @@ -62,43 +62,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 key (type: string) -1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3 -Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -133,40 +137,44 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator +
[31/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 1f1bf3d..814c947 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -135,12 +135,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE +value expressions: _col1 (type: string) Execution mode: llap Map 2 Map Operator Tree: @@ -150,28 +154,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) -outputColumnNames: _col0, _col1, _col2 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col3 +input vertices: + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false +HybridGraceHashJoin: true +Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.TextInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap Stage: Stage-0 @@ -229,11 +237,15 @@ STAGE PLANS: Filter Operator predicate: key
[45/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index bbfa756..31a1b29 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -68,19 +68,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -value expressions: _col0 (type: bigint) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 +Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: int) +1 _col0 (type: int) + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -144,23 +148,27 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) + Select Operator +expressions: key (type: int) outputColumnNames: _col0 -Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator -key expressions: _col0 (type: int) -sort order: + -Map-reduce partition columns: _col0 (type: int) -value expressions: _col1 (type: bigint) +Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: int) +1 _col0 (type: int) + outputColumnNames: _col0 + Group By Operator +aggregations: count() +keys: _col0 (type: int) +mode: hash +outputColumnNames: _col0, _col1 +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -241,29 +249,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) + Select Operator +expressions: key (type: int) outputColumnNames: _col0 -Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator -key expressions: _col0 (type: int) -sort order: + -Map-reduce partition
[38/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/cross_product_check_2.q.out -- diff --git a/ql/src/test/results/clientpositive/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/cross_product_check_2.q.out index 6910b40..57d1498 100644 --- a/ql/src/test/results/clientpositive/cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/cross_product_check_2.q.out @@ -93,7 +93,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-5:MAPRED' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A @@ -107,60 +107,68 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: -a +$hdt$_0:d1 Fetch Operator limit: -1 -d1 +$hdt$_2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -a - TableScan -alias: a -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -HashTable Sink Operator - keys: -0 -1 -d1 +$hdt$_0:d1 TableScan alias: d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) +$hdt$_2:a + TableScan +alias: a +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 key (type: string) - 1 key (type: string) + 0 + 1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan -alias: d2 +alias: d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -outputColumnNames: _col0, _col1, _col5, _col6 -Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: -0 -1 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
[68/91] [abbrv] hive git commit: HIVE-12329 :Turn on limit pushdown optimization by default (Ashutosh Chauhan via Prasanth J)
http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out b/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out index 12920d2..bcbdf06 100644 --- a/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out +++ b/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out @@ -161,6 +161,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_char_2.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_char_2.q.out b/ql/src/test/results/clientpositive/tez/vector_char_2.q.out index 8545608..f88ee91 100644 --- a/ql/src/test/results/clientpositive/tez/vector_char_2.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -106,6 +106,7 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized @@ -234,6 +235,7 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out b/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out index be38775..617620c 100644 --- a/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -79,6 +79,7 @@ STAGE PLANS: key expressions: _col0 (type: char(10)) sort order: + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: char(20)) Execution mode: vectorized Reducer 2 @@ -179,6 +180,7 @@ STAGE PLANS: key expressions: _col0 (type: char(10)) sort order: - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: char(20)) Execution mode: vectorized Reducer 2 @@ -282,6 +284,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out index c492113..1142485 100644 --- a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out @@ -41,6 +41,7 @@ STAGE PLANS: key expressions: null (type: double), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) sort order: ++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 Execution mode:
[84/91] [abbrv] hive git commit: HIVE-12465: Hive might produce wrong results when (outer) joins are merged (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-12465: Hive might produce wrong results when (outer) joins are merged (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79847387 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79847387 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79847387 Branch: refs/heads/spark Commit: 79847387699b803506ecd8b03ecc8790ee229751 Parents: f1ac5a3 Author: Jesus Camacho RodriguezAuthored: Tue Nov 24 17:20:05 2015 +0100 Committer: Jesus Camacho Rodriguez Committed: Fri Nov 27 10:10:46 2015 +0100 -- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- ql/src/test/queries/clientpositive/mergejoin.q | 12 + .../test/results/clientpositive/mergejoin.q.out | 548 +++ .../results/clientpositive/tez/mergejoin.q.out | 548 +++ 4 files changed, 1109 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1b7873d..0ff6001 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2265,7 +2265,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (rightCondAl1.size() != 0) { QBJoinTree leftTree = joinTree.getJoinSrc(); List leftTreeLeftSrc = new ArrayList(); -if (leftTree != null) { +if (leftTree != null && leftTree.getNoOuterJoin()) { String leftTreeRightSource = leftTree.getRightAliases() != null && leftTree.getRightAliases().length > 0 ? leftTree.getRightAliases()[0] : null; http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/test/queries/clientpositive/mergejoin.q -- diff --git a/ql/src/test/queries/clientpositive/mergejoin.q b/ql/src/test/queries/clientpositive/mergejoin.q index 6cd3929..82e1c93 100644 --- a/ql/src/test/queries/clientpositive/mergejoin.q +++ b/ql/src/test/queries/clientpositive/mergejoin.q @@ -132,3 +132,15 @@ select * from (select * from tab where tab.key = 0)a join (select * from tab_part where tab_part.key = 98)b on a.key = b.key full outer join tab_part c on b.key = c.key; + +set hive.cbo.enable = false; + +select * from +(select * from tab where tab.key = 0)a +full outer join +(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key; + +select * from +(select * from tab where tab.key = 0)a +join +(select * from tab_part where tab_part.key = 98)b full outer join tab_part c on a.key = b.key and b.key = c.key; http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/test/results/clientpositive/mergejoin.q.out -- diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 65f5ef5..e4a9e5b 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -3787,3 +3787,551 @@ NULLNULLNULLNULLNULLNULL97 val_97 2008-04-08 NULL NULLNULLNULLNULLNULL97 val_97 2008-04-08 NULL NULLNULLNULLNULLNULL98 val_98 2008-04-08 NULL NULLNULLNULLNULLNULL98 val_98 2008-04-08 +Warning: Shuffle Join JOIN[9][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select * from +(select * from tab where tab.key = 0)a +full outer join +(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 + A masked pattern was here +POSTHOOK: query: select * from +(select * from tab where tab.key = 0)a +full outer join +(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 + A masked pattern was here +Warning: Shuffle Join JOIN[9][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select *
[62/91] [abbrv] hive git commit: HIVE-12175: Upgrade Kryo version to 3.0.x (Prasanth Jayachandran reviewed by Ashutosh Chauhan)
HIVE-12175: Upgrade Kryo version to 3.0.x (Prasanth Jayachandran reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b7281ce6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b7281ce6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b7281ce6 Branch: refs/heads/spark Commit: b7281ce6a61bcfbd398fd691cddc38c4f1a61f64 Parents: c6a835c Author: Prasanth JayachandranAuthored: Tue Nov 24 12:43:46 2015 -0600 Committer: Prasanth Jayachandran Committed: Tue Nov 24 12:43:46 2015 -0600 -- itests/qtest-accumulo/pom.xml | 2 +- pom.xml | 6 +- ql/pom.xml | 36 +++-- .../apache/hadoop/hive/ql/exec/Utilities.java | 145 +-- .../org/apache/hadoop/hive/ql/plan/MapWork.java | 15 -- .../apache/hadoop/hive/ql/plan/ReduceWork.java | 5 - spark-client/pom.xml| 28 ++-- .../hive/spark/client/rpc/KryoMessageCodec.java | 11 +- 8 files changed, 185 insertions(+), 63 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/itests/qtest-accumulo/pom.xml -- diff --git a/itests/qtest-accumulo/pom.xml b/itests/qtest-accumulo/pom.xml index 7403a15..f7325dc 100644 --- a/itests/qtest-accumulo/pom.xml +++ b/itests/qtest-accumulo/pom.xml @@ -123,7 +123,7 @@ - com.esotericsoftware.kryo + com.esotericsoftware kryo ${kryo.version} test http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/pom.xml -- diff --git a/pom.xml b/pom.xml index c6df4a5..c38c10f 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,7 @@ 3.5.2 20090211 4.11 -2.22 +3.0.3 0.9.3 0.9.3 2.4 @@ -228,8 +228,8 @@ -com.esotericsoftware.kryo -kryo +com.esotericsoftware +kryo-shaded ${kryo.version} http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/ql/pom.xml -- diff --git a/ql/pom.xml b/ql/pom.xml index 9420a62..d893099 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -72,8 +72,8 @@ - com.esotericsoftware.kryo - kryo + com.esotericsoftware + kryo-shaded ${kryo.version} @@ -594,16 +594,20 @@ spark-core_${scala.binary.version} ${spark.version} true - - -org.slf4j -slf4j-log4j12 - - -commmons-logging -commons-logging - - + + + com.esotericsoftware.kryo + kryo + + + org.slf4j + slf4j-log4j12 + + + commmons-logging + commons-logging + + com.sun.jersey @@ -746,7 +750,9 @@ org.apache.hive:hive-serde org.apache.hive:hive-llap-client org.apache.hive:hive-metastore - com.esotericsoftware.kryo:kryo + com.esotericsoftware:kryo-shaded + com.esotericsoftware:minlog + org.objenesis:objenesis org.apache.parquet:parquet-hadoop-bundle org.apache.thrift:libthrift org.apache.thrift:libfb303 @@ -779,6 +785,10 @@ com.esotericsoftware org.apache.hive.com.esotericsoftware + + org.objenesis + org.apache.hive.org.objenesis + http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 9dbb45a..8b8cf6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -37,6 +37,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Array; +import java.lang.reflect.Field; import java.net.URI; import java.net.URL; import java.net.URLClassLoader; @@ -87,8 +89,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.WordUtils; import org.apache.commons.lang3.StringEscapeUtils; import
[30/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out index 72a5d0d..a1addb7 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out @@ -38,76 +38,79 @@ union all select 2 as id from tb2 limit 1) b on a.id=b.id POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing statistics. Please check log for more details. +Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 5 <- Union 2 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 3 <- Union 4 (CONTAINS) +Map 6 <- Union 4 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 5 <- Union 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_16] + Reducer 2 llap + File Output Operator [FS_17] compressed:false Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} -Merge Join Operator [MERGEJOIN_20] +Merge Join Operator [MERGEJOIN_21] | condition map:[{"":"Left Outer Join0 to 1"}] -| keys:{"0":"id (type: int)","1":"_col0 (type: int)"} +| keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -|<-Map 6 [SIMPLE_EDGE] llap -| Reduce Output Operator [RS_12] -| key expressions:id (type: int) -| Map-reduce partition columns:id (type: int) +|<-Map 1 [SIMPLE_EDGE] llap +| Reduce Output Operator [RS_13] +| key expressions:_col0 (type: int) +| Map-reduce partition columns:_col0 (type: int) | sort order:+ | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -| TableScan [TS_11] -|alias:a +| Select Operator [SEL_1] +|outputColumnNames:["_col0"] |Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -|<-Reducer 3 [SIMPLE_EDGE] llap - Reduce Output Operator [RS_13] +|TableScan [TS_0] +| alias:a +| Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +|<-Reducer 5 [SIMPLE_EDGE] llap + Reduce Output Operator [RS_14] key expressions:_col0 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit [LIM_10] + Limit [LIM_11] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_9] + Select Operator [SEL_10] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - |<-Union 2 [SIMPLE_EDGE] -|<-Map 1 [CONTAINS] llap -| Reduce Output Operator [RS_8] + |<-Union 4 [SIMPLE_EDGE] +|<-Map 3 [CONTAINS] llap +| Reduce Output Operator [RS_9] | sort order: | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE | value expressions:_col0 (type: int) -| Limit [LIM_7] +| Limit [LIM_8] |Number of rows:1 |Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE -|Select Operator [SEL_1] +|Select Operator [SEL_3] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -|
[40/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer13.q.out -- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out index d652d87..8771f1c 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -65,11 +65,11 @@ STAGE PLANS: Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) -outputColumnNames: c3, c1 +outputColumnNames: _col0, _col1 Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: c3 (type: string), c1 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE @@ -112,7 +112,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -159,26 +159,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan -alias: x1 +alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c1 is not null) and c3 is not null) (type: boolean) - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + predicate: c2 > 100) and (c1 < 120)) and c1 is not null) and c3 is not null) (type: boolean) + Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) -outputColumnNames: c3, c1 -Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE +outputColumnNames: _col0, _col1 +Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: c3 (type: string), c1 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) -Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -186,11 +186,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer4.q.out -- diff --git
[69/91] [abbrv] hive git commit: HIVE-12329 :Turn on limit pushdown optimization by default (Ashutosh Chauhan via Prasanth J)
http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out -- diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out index 64a3ea2..8608187 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out @@ -124,6 +124,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) auto parallelism: false Path -> Alias: @@ -382,6 +384,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) auto parallelism: false Path -> Alias: @@ -588,6 +592,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) auto parallelism: false Path -> Alias: @@ -827,6 +833,8 @@ STAGE PLANS: sort order: + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) auto parallelism: false Local Work: http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index a234ff5..932fdcc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -65,6 +65,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 3 Reduce Operator Tree: Select Operator @@ -238,6 +239,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator @@ -411,6 +413,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out index a99cb74..84f68a3 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out @@ -262,6 +262,8 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE tag: -1 + TopN: 1 + TopN Hash Memory Usage: 0.1 auto parallelism: false Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/ctas.q.out
[59/91] [abbrv] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out -- diff --git a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out b/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out deleted file mode 100644 index 626dcff..000 --- a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out +++ /dev/null @@ -1,465 +0,0 @@ -PREHOOK: query: -- partitioned table analyze - -create table dummy (key string, value string) partitioned by (ds string, hr string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dummy -POSTHOOK: query: -- partitioned table analyze - -create table dummy (key string, value string) partitioned by (ds string, hr string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dummy -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') -PREHOOK: type: LOAD - A masked pattern was here -PREHOOK: Output: default@dummy -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') -POSTHOOK: type: LOAD - A masked pattern was here -POSTHOOK: Output: default@dummy -POSTHOOK: Output: default@dummy@ds=2008/hr=12 -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') -PREHOOK: type: LOAD - A masked pattern was here -PREHOOK: Output: default@dummy -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') -POSTHOOK: type: LOAD - A masked pattern was here -POSTHOOK: Output: default@dummy -POSTHOOK: Output: default@dummy@ds=2008/hr=11 -PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@dummy -PREHOOK: Input: default@dummy@ds=2008/hr=11 -PREHOOK: Input: default@dummy@ds=2008/hr=12 -PREHOOK: Output: default@dummy -PREHOOK: Output: default@dummy@ds=2008/hr=11 -PREHOOK: Output: default@dummy@ds=2008/hr=12 -POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dummy -POSTHOOK: Input: default@dummy@ds=2008/hr=11 -POSTHOOK: Input: default@dummy@ds=2008/hr=12 -POSTHOOK: Output: default@dummy -POSTHOOK: Output: default@dummy@ds=2008/hr=11 -POSTHOOK: Output: default@dummy@ds=2008/hr=12 -PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dummy -POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dummy -# col_name data_type comment - -keystring -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008, 11] -Database: default -Table: dummy - A masked pattern was here -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles1 - numRows 500 - rawDataSize 5312 - totalSize 5812 - A masked pattern was here - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed:No -Num Buckets: -1 -Bucket Columns:[] -Sort Columns: [] -Storage Desc Params: - serialization.format1 -PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dummy -POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dummy -# col_name data_type comment - -keystring -value string
[34/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out -- diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out index b0258b8..094f646 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out @@ -76,61 +76,66 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: p_name (type: string) -sort order: + -Map-reduce partition columns: p_name (type: string) + Select Operator +expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE -value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) +Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator -key expressions: p2_name (type: string) -sort order: + -Map-reduce partition columns: p2_name (type: string) + Select Operator +expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) +Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator -key expressions: p3_name (type: string) -sort order: + -Map-reduce partition columns: p3_name (type: string) + Select Operator +expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -value expressions: p3_partkey (type: int), p3_mfgr (type:
[33/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/join_filters_overlap.q.out -- diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/join_filters_overlap.q.out index f52cf26..1d04f37 100644 --- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -104,46 +104,58 @@ STAGE PLANS: alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE +tag: 0 +value expressions: _col1 (type: int) +auto parallelism: false TableScan -alias: b +alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int), 50 (type: int) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE -tag: 1 -value expressions: value (type: int) -auto parallelism: false +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: int) + auto parallelism: false TableScan -alias: c +alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int), 60 (type: int) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE -tag: 2 -value expressions: value (type: int) -auto parallelism: false +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: _col1 (type: int) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -192,7 +204,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: -/a [a, b, c] +/a [$hdt$_0:a, $hdt$_1:a, $hdt$_2:a] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -206,37 +218,33 @@ STAGE PLANS: 1 2 keys: -0 key (type: int) -1 key (type: int) -2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 +0 _col0 (type: int) +1 _col0 (type: int) +
[09/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index fed923c..4133fda 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -173,11 +173,15 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) - Position of Big Table: 1 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +Position of Big Table: 1 Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -252,48 +256,52 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) -outputColumnNames: _col0, _col1, _col2 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col3 +input vertices: + 0 Map 1 +Position of Big Table: 1 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - GlobalTableId: 1 - A masked pattern was here - NumFilesPerFileSink: 1 +BucketMapJoin: true +Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - A masked pattern was here - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: -bucket_count -1 -columns key,value1,value2 -columns.comments -columns.types string:string:string - A masked pattern was here -name default.bucketmapjoin_tmp_result -serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} -serialization.format 1 -serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - A masked pattern was here -
[04/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 5a77830..2eb0c3b 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -79,12 +79,16 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) +Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) +Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -93,12 +97,16 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) +Select Operator + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 4 Map Operator Tree: TableScan @@ -107,12 +115,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) +Select Operator + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) +
[18/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoin.q.out -- diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index 22a9421..13c4470 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -101,15 +101,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -117,14 +116,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -133,10 +133,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) +expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -175,9 +175,9 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Select Operator -expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) +expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -595,15 +595,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -611,14 +610,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
[02/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 0f9d1ae..7ac16d0 100644 --- a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -109,14 +109,18 @@ STAGE PLANS: alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE -tag: 0 -value expressions: value (type: int) -auto parallelism: false +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: int) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -169,21 +173,25 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), 50 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE +tag: 1 +value expressions: _col1 (type: int) +auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -232,25 +240,29 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [a] Map 4 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), 60 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator +key expressions: _col0 (type: int) +
[66/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q -- diff --git a/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q b/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q index 9110dcc..82c18e2 100644 --- a/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q +++ b/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_danp(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_all_partitioned.q -- diff --git a/ql/src/test/queries/clientpositive/delete_all_partitioned.q b/ql/src/test/queries/clientpositive/delete_all_partitioned.q index f082b6d..122b3e2 100644 --- a/ql/src/test/queries/clientpositive/delete_all_partitioned.q +++ b/ql/src/test/queries/clientpositive/delete_all_partitioned.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_dap(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_orig_table.q -- diff --git a/ql/src/test/queries/clientpositive/delete_orig_table.q b/ql/src/test/queries/clientpositive/delete_orig_table.q index fd23f4b..88cc830 100644 --- a/ql/src/test/queries/clientpositive/delete_orig_table.q +++ b/ql/src/test/queries/clientpositive/delete_orig_table.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table; dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/0_0; http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_tmp_table.q -- diff --git a/ql/src/test/queries/clientpositive/delete_tmp_table.q b/ql/src/test/queries/clientpositive/delete_tmp_table.q index eb6c095..c7d8aa6 100644 --- a/ql/src/test/queries/clientpositive/delete_tmp_table.q +++ b/ql/src/test/queries/clientpositive/delete_tmp_table.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_no_match.q -- diff --git a/ql/src/test/queries/clientpositive/delete_where_no_match.q b/ql/src/test/queries/clientpositive/delete_where_no_match.q index 8ed979d..f13dd73 100644 --- a/ql/src/test/queries/clientpositive/delete_where_no_match.q +++ b/ql/src/test/queries/clientpositive/delete_where_no_match.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_dwnm(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q -- diff --git a/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q b/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q index dac5375..de1ca36 100644 --- a/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q +++ b/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_dwnp(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_partitioned.q -- diff --git
[85/91] [abbrv] hive git commit: HIVE-12008: Hive queries failing when using count(*) on column in view (Yongzhi Chen, reviewed by Szehon ho)
http://git-wip-us.apache.org/repos/asf/hive/blob/a51e5d4e/ql/src/test/results/clientpositive/union9.q.out -- diff --git a/ql/src/test/results/clientpositive/union9.q.out b/ql/src/test/results/clientpositive/union9.q.out index ec7ab35..5f54210 100644 --- a/ql/src/test/results/clientpositive/union9.q.out +++ b/ql/src/test/results/clientpositive/union9.q.out @@ -24,73 +24,67 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Union -Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) TableScan alias: s1 -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Union -Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) TableScan alias: s1 -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Union -Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
[77/91] [abbrv] hive git commit: HIVE-12473 : DPP: UDFs on the partition column side does not evaluate correctly (Sergey Shelukhin, reviewed by Gopal V)
HIVE-12473 : DPP: UDFs on the partition column side does not evaluate correctly (Sergey Shelukhin, reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/454c2cae Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/454c2cae Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/454c2cae Branch: refs/heads/spark Commit: 454c2cae4f0a3db1f225da6c283c161fe09bfb87 Parents: 74c0c97 Author: Sergey ShelukhinAuthored: Wed Nov 25 12:15:29 2015 -0800 Committer: Sergey Shelukhin Committed: Wed Nov 25 12:15:29 2015 -0800 -- .../ql/exec/tez/DynamicPartitionPruner.java | 40 ++-- 1 file changed, 29 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/454c2cae/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java index b67ac8d..60b71aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java @@ -18,6 +18,12 @@ package org.apache.hadoop.hive.ql.exec.tez; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; + +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; + +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; + import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; @@ -242,25 +248,37 @@ public class DynamicPartitionPruner { LOG.debug(sb.toString()); } -ObjectInspector oi = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory -.getPrimitiveTypeInfo(si.fieldInspector.getTypeName())); +ObjectInspector targetOi = findTargetOi(si.partKey, si.columnName); +Converter converter = ObjectInspectorConverters.getConverter( +PrimitiveObjectInspectorFactory.javaStringObjectInspector, targetOi); -Converter converter = -ObjectInspectorConverters.getConverter( -PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi); - -StructObjectInspector soi = -ObjectInspectorFactory.getStandardStructObjectInspector( -Collections.singletonList(columnName), Collections.singletonList(oi)); +StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector( +Collections.singletonList(columnName), Collections.singletonList(targetOi)); @SuppressWarnings("rawtypes") ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(si.partKey); -eval.initialize(soi); +eval.initialize(soi); // We expect the row with just the relevant column. applyFilterToPartitions(converter, eval, columnName, values); } + private ObjectInspector findTargetOi(ExprNodeDesc expr, String columnName) { +if (expr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr; + // TODO: this is not necessarily going to work for all cases. At least, table name is needed. + // Also it's not clear if this is going to work with subquery columns and such. + if (columnName.equals(colExpr.getColumn())) { +return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( +(PrimitiveTypeInfo)colExpr.getTypeInfo()); + } +} +for (ExprNodeDesc child : expr.getChildren()) { + ObjectInspector oi = findTargetOi(child, columnName); + if (oi != null) return oi; +} +return null; + } + @SuppressWarnings("rawtypes") private void applyFilterToPartitions(Converter converter, ExprNodeEvaluator eval, String columnName, Set values) throws HiveException {
[91/91] [abbrv] hive git commit: Merge branch 'master' into spark
Merge branch 'master' into spark Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79035f1c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79035f1c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79035f1c Branch: refs/heads/spark Commit: 79035f1c520bbc9a900723b5b9c065b67fde636d Parents: eddb8ca 0a96201 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Sun Nov 29 16:05:29 2015 -0800 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Sun Nov 29 16:05:29 2015 -0800 -- accumulo-handler/pom.xml|8 +- ant/pom.xml |6 + .../java/org/apache/hive/beeline/BeeLine.java | 13 +- .../apache/hive/beeline/cli/TestHiveCli.java|1 + beeline/src/test/resources/hive-site.xml|5 + bin/ext/version.sh |2 +- bin/hive| 23 +- .../org/apache/hadoop/hive/cli/CliDriver.java | 11 + common/pom.xml | 28 +- .../hadoop/hive/common/StatsSetupConst.java | 13 - .../hive/common/metrics/LegacyMetrics.java | 27 +- .../hive/common/metrics/common/Metrics.java | 28 +- .../common/metrics/common/MetricsConstant.java |6 + .../common/metrics/common/MetricsScope.java | 33 + .../metrics/metrics2/CodahaleMetrics.java | 41 +- .../org/apache/hadoop/hive/conf/HiveConf.java | 96 +- .../apache/hadoop/hive/ql/log/PerfLogger.java | 27 + .../apache/hive/common/util/BloomFilter.java| 18 +- .../org/apache/hive/common/util/Murmur3.java| 107 +- .../hive/http/AdminAuthorizedServlet.java | 45 + .../java/org/apache/hive/http/ConfServlet.java | 101 + .../java/org/apache/hive/http/HttpServer.java | 316 ++ .../org/apache/hive/http/JMXJsonServlet.java| 412 +++ .../hive/common/metrics/MetricsTestUtils.java | 61 + .../hive/common/metrics/TestLegacyMetrics.java | 46 +- .../metrics/metrics2/TestCodahaleMetrics.java | 14 +- .../apache/hive/common/util/TestMurmur3.java| 45 +- data/conf/hive-site.xml |5 + data/conf/llap/hive-site.xml|7 +- data/conf/spark/standalone/hive-site.xml|7 +- data/conf/spark/yarn-client/hive-site.xml |6 +- data/conf/tez/hive-site.xml |9 +- .../hive/hbase/HiveHBaseTableInputFormat.java | 10 + .../test/results/positive/hbase_queries.q.out | 13 +- .../src/test/templates/TestHBaseCliDriver.vm| 63 +- .../templates/TestHBaseNegativeCliDriver.vm | 64 +- .../mapreduce/FosterStorageHandler.java | 37 + .../hive/hcatalog/mapreduce/InputJobInfo.java |8 +- .../rcfile/RCFileMapReduceInputFormat.java |8 +- .../rcfile/TestRCFileMapReduceInputFormat.java |4 +- .../streaming/AbstractRecordWriter.java | 51 +- .../hcatalog/streaming/ConnectionError.java |3 +- .../streaming/DelimitedInputWriter.java |5 +- .../hive/hcatalog/streaming/HiveEndPoint.java | 211 +- .../hcatalog/streaming/StrictJsonWriter.java| 11 +- .../hcatalog/streaming/TransactionBatch.java|1 + .../hcatalog/streaming/TransactionError.java|2 +- .../hive/hcatalog/streaming/TestStreaming.java | 175 +- .../streaming/mutate/StreamingAssert.java |2 + hplsql/pom.xml |5 - itests/hive-unit/pom.xml|7 + ...estDDLWithRemoteMetastoreSecondNamenode.java |3 +- .../hive/ql/txn/compactor/TestCompactor.java| 246 +- .../org/apache/hive/jdbc/TestJdbcDriver2.java | 89 +- .../hive/jdbc/miniHS2/TestHs2Metrics.java | 116 + itests/qtest-accumulo/pom.xml |2 +- .../test/resources/testconfiguration.properties | 23 +- .../hadoop/hive/hbase/HBaseTestSetup.java |9 +- .../org/apache/hadoop/hive/ql/QTestUtil.java| 11 +- jdbc/pom.xml| 52 + .../hive/llap/io/api/impl/LlapInputFormat.java | 40 +- .../hadoop/hive/metastore/HiveMetaStore.java|9 +- .../hive/metastore/MetaStoreDirectSql.java | 12 + .../metastore/txn/CompactionTxnHandler.java | 170 +- .../hadoop/hive/metastore/txn/TxnHandler.java | 533 +++- .../metastore/txn/TestCompactionTxnHandler.java | 37 - .../hive/metastore/txn/TestTxnHandler.java | 10 +- packaging/pom.xml | 15 +- pom.xml | 15 +- ql/pom.xml | 42 +- .../java/org/apache/hadoop/hive/ql/Driver.java | 30 +- .../org/apache/hadoop/hive/ql/ErrorMsg.java |8 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java |5 +- .../hadoop/hive/ql/exec/FetchOperator.java | 10 +- .../a
[11/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index 23530bd..ff57c08 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -142,26 +142,30 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE -Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: string) -1 key (type: string) - Position of Big Table: 1 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Group By Operator -aggregations: count() -mode: hash -outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - sort order: +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: string) + 1 _col0 (type: string) +Position of Big Table: 1 +Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE +BucketMapJoin: true +Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +tag: -1 +value expressions: _col0 (type: bigint) +auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -214,7 +218,7 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [$hdt$_1:b] Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -326,26 +330,30 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE -Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: string) -1 key (type: string) - Position of Big Table: 0 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Group By Operator -aggregations: count() -mode: hash -outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - sort order: +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator +condition
[51/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7dab21ac Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7dab21ac Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7dab21ac Branch: refs/heads/spark Commit: 7dab21acffdd43e80e6fefb5011139bbf30fa541 Parents: 55b589e Author: Jesus Camacho RodriguezAuthored: Sat Nov 14 09:12:49 2015 +0100 Committer: Jesus Camacho Rodriguez Committed: Fri Nov 20 23:31:11 2015 +0100 -- .../test/results/positive/hbase_queries.q.out | 12 +- .../ql/optimizer/calcite/HiveRelOptUtil.java| 40 + .../rules/HiveAggregateProjectMergeRule.java| 13 +- .../translator/SqlFunctionConverter.java|8 + .../hadoop/hive/ql/parse/CalcitePlanner.java| 169 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 16 - ql/src/test/queries/clientpositive/mergejoin.q |9 + .../bucket_mapjoin_mismatch1.q.out | 36 +- .../clientnegative/join_nonexistent_part.q.out |1 - .../annotate_stats_join_pkfk.q.out | 230 +- .../archive_excludeHadoop20.q.out |1 + .../results/clientpositive/archive_multi.q.out |1 + .../results/clientpositive/auto_join1.q.out | 12 +- .../results/clientpositive/auto_join10.q.out| 12 +- .../results/clientpositive/auto_join11.q.out| 12 +- .../results/clientpositive/auto_join14.q.out|8 +- .../results/clientpositive/auto_join24.q.out| 54 +- .../results/clientpositive/auto_join26.q.out| 30 +- .../results/clientpositive/auto_join32.q.out| 153 +- .../clientpositive/auto_join_filters.q.out | 12 +- .../clientpositive/auto_join_nulls.q.out|2 +- .../auto_join_reordering_values.q.out | 152 +- .../clientpositive/auto_join_stats.q.out| 392 ++-- .../clientpositive/auto_join_stats2.q.out | 214 +- .../clientpositive/auto_smb_mapjoin_14.q.out| 145 +- .../clientpositive/auto_sortmerge_join_1.q.out | 240 +- .../clientpositive/auto_sortmerge_join_10.q.out | 103 +- .../clientpositive/auto_sortmerge_join_11.q.out | 124 +- .../clientpositive/auto_sortmerge_join_12.q.out | 177 +- .../clientpositive/auto_sortmerge_join_14.q.out | 152 +- .../clientpositive/auto_sortmerge_join_15.q.out | 152 +- .../clientpositive/auto_sortmerge_join_2.q.out | 198 +- .../clientpositive/auto_sortmerge_join_3.q.out | 234 +- .../clientpositive/auto_sortmerge_join_4.q.out | 234 +- .../clientpositive/auto_sortmerge_join_5.q.out | 232 +- .../clientpositive/auto_sortmerge_join_6.q.out | 1140 -- .../clientpositive/auto_sortmerge_join_7.q.out | 242 +- .../clientpositive/auto_sortmerge_join_8.q.out | 242 +- .../clientpositive/auto_sortmerge_join_9.q.out | 1240 +- .../clientpositive/bucket_map_join_spark1.q.out | 206 +- .../clientpositive/bucket_map_join_spark2.q.out | 206 +- .../clientpositive/bucket_map_join_spark3.q.out | 206 +- .../clientpositive/bucket_map_join_spark4.q.out | 236 +- .../bucketsortoptimize_insert_2.q.out | 218 +- .../bucketsortoptimize_insert_4.q.out | 112 +- .../bucketsortoptimize_insert_5.q.out | 142 +- .../bucketsortoptimize_insert_6.q.out | 554 +++-- .../bucketsortoptimize_insert_7.q.out | 48 +- .../bucketsortoptimize_insert_8.q.out | 76 +- .../cbo_rp_cross_product_check_2.q.out | 468 ++-- .../results/clientpositive/cbo_rp_join1.q.out | 60 +- .../clientpositive/cbo_rp_lineage2.q.out| 18 +- .../clientpositive/column_access_stats.q.out| 84 +- .../results/clientpositive/constprog2.q.out | 20 +- .../clientpositive/constprog_partitioner.q.out | 10 +- .../clientpositive/correlationoptimizer1.q.out | 886 .../clientpositive/correlationoptimizer11.q.out | 120 +- .../clientpositive/correlationoptimizer13.q.out | 26 +- .../clientpositive/correlationoptimizer4.q.out | 972 .../clientpositive/correlationoptimizer5.q.out | 248 +- .../clientpositive/correlationoptimizer9.q.out | 250 +- .../results/clientpositive/create_view.q.out|2 +- .../clientpositive/cross_product_check_1.q.out | 252 ++- .../clientpositive/cross_product_check_2.q.out | 332 +-- .../results/clientpositive/decimal_join2.q.out | 90 +- .../clientpositive/dynamic_rdd_cache.q.out | 265 ++- .../encryption_join_unencrypted_tbl.q.out | 98 +- ...on_join_with_different_encryption_keys.q.out | 102 +- .../clientpositive/explain_logical.q.out| 142 +- .../clientpositive/explain_rearrange.q.out | 288 +-- .../clientpositive/filter_join_breaktask.q.out | 86 +-