svn commit: r1838505 - /hive/cms/trunk/content/people.mdtext
Author: xuefu Date: Mon Aug 20 22:09:32 2018 New Revision: 1838505 URL: http://svn.apache.org/viewvc?rev=1838505&view=rev Log: Update Xuefu's org in the committer list Modified: hive/cms/trunk/content/people.mdtext Modified: hive/cms/trunk/content/people.mdtext URL: http://svn.apache.org/viewvc/hive/cms/trunk/content/people.mdtext?rev=1838505&r1=1838504&r2=1838505&view=diff == --- hive/cms/trunk/content/people.mdtext (original) +++ hive/cms/trunk/content/people.mdtext Mon Aug 20 22:09:32 2018 @@ -334,7 +334,7 @@ tr:nth-child(2n+1) { xuefu Xuefu Zhang - +https://www.alibaba.com/";>Alibaba Inc
hive git commit: HIVE-17257: Hive should merge empty files (Chao via Xuefu)
Repository: hive Updated Branches: refs/heads/master ad1243bef -> 9816cfb44 HIVE-17257: Hive should merge empty files (Chao via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9816cfb4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9816cfb4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9816cfb4 Branch: refs/heads/master Commit: 9816cfb44ad91a8c2a030e540a703983862e4123 Parents: ad1243b Author: Xuefu Zhang Authored: Thu Jan 18 11:26:54 2018 -0800 Committer: Xuefu Zhang Committed: Thu Jan 18 11:26:54 2018 -0800 -- .../ql/plan/ConditionalResolverMergeFiles.java | 2 +- .../test/queries/clientpositive/merge_empty.q | 14 ++ .../results/clientpositive/merge_empty.q.out| 45 3 files changed, 60 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 129347b..ebf2298 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -408,7 +408,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, */ private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) { AverageSize averageSize = getAverageSize(inpFs, dirPath); -if (averageSize.getTotalSize() <= 0) { +if (averageSize.getTotalSize() < 0) { return -1; } http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/test/queries/clientpositive/merge_empty.q -- diff --git a/ql/src/test/queries/clientpositive/merge_empty.q b/ql/src/test/queries/clientpositive/merge_empty.q new file mode 100644 index 000..188b39e --- /dev/null +++ b/ql/src/test/queries/clientpositive/merge_empty.q @@ -0,0 +1,14 @@ +set hive.merge.mapredfiles=true; +set hive.merge.sparkfiles=true; +set hive.auto.convert.join=false; +set mapreduce.job.reduces=1000; + +create table dummy (a string); +insert overwrite directory '/tmp/test' select src.key from src join dummy on src.key = dummy.a; +dfs -ls /tmp/test; + +-- verify that this doesn't merge for bucketed tables +create table foo (a bigint, b string) clustered by (a) into 256 buckets; +create table bar (a bigint, b string); +insert overwrite table foo select * from bar; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/foo; http://git-wip-us.apache.org/repos/asf/hive/blob/9816cfb4/ql/src/test/results/clientpositive/merge_empty.q.out -- diff --git a/ql/src/test/results/clientpositive/merge_empty.q.out b/ql/src/test/results/clientpositive/merge_empty.q.out new file mode 100644 index 000..c13cbf4 --- /dev/null +++ b/ql/src/test/results/clientpositive/merge_empty.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: create table dummy (a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: create table dummy (a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy + A masked pattern was here +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Input: default@src + A masked pattern was here +PREHOOK: query: create table foo (a bigint, b string) clustered by (a) into 256 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (a bigint, b string) clustered by (a) into 256 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: create table bar (a bigint, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bar +POSTHOOK: query: create table bar (a bigint, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bar +PREHOOK: query: insert overwrite table foo select * from bar +PREHOOK: type: QUERY +PREHOOK: Input: default@bar +PREHOOK: Output: default@foo +POSTHOOK: query: insert overwrite table foo select * from bar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bar +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.a SIMPLE [(bar)bar.FieldSchema(name:a, type:bigint, co
[4/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out new file mode 100644 index 000..144c3ec --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out @@ -0,0 +1,182 @@ +PREHOOK: query: CREATE TABLE table_7 (int_col INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_7 +POSTHOOK: query: CREATE TABLE table_7 (int_col INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_7 +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +PREHOOK: query: explain +SELECT +(t1.int_col) * (t1.int_col) AS int_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) t1 +WHERE +(False) NOT IN (SELECT +False AS boolean_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) tt1 +WHERE +(t1.int_col) = (tt1.int_col)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT +(t1.int_col) * (t1.int_col) AS int_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) t1 +WHERE +(False) NOT IN (SELECT +False AS boolean_col +FROM ( +SELECT +MIN(NULL) OVER () AS int_col +FROM table_7 +) tt1 +WHERE +(t1.int_col) = (tt1.int_col)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) +Reducer 5 <- Map 4 (GROUP, 1) +Reducer 7 <- Map 6 (GROUP, 2) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: table_7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Map 4 +Map Operator Tree: +TableScan + alias: table_7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) +Map 6 +Map Operator Tree: +TableScan + alias: table_7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator +keys: false (type: boolean) +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE +Reducer 2 +Reduce Operator Tree: + Join Operator +
[6/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out b/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out new file mode 100644 index 000..15f33f0 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_nested_subquery.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part + A masked pattern was here +POSTHOOK: query: select * +from part x +where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part + A masked pattern was here +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN31 LG DRUM 1789.69 ickly ir +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN17 SM CASE 1671.68 are slyly after the sl +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +48427 almond antique violet mint lemonManufacturer#4 Brand#42 PROMO POLISHED STEEL39 SM CASE 1375.42 hely ironic i +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG1414.42 arefully +15103 almond aquamarine dodger light gainsboroManufacturer#5 Brand#53ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +17927 almond aquamarine yellow dodger mintManufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN2 JUMBO BOX 1173.15 e pinto beans h +17273 almond antique forest lavender goldenrodManufacturer#3 Brand#35PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +40982 almond antique misty red olive Manufacturer#3 Brand#32ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +155733 almond antique sky peru orange Manufacturer#5 Brand#53SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
[8/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8eaf18d5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8eaf18d5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8eaf18d5 Branch: refs/heads/master Commit: 8eaf18d599909751efc4bb1e05d31e65da8a8d1e Parents: 1253450 Author: Xuefu Zhang Authored: Fri Oct 13 10:03:35 2017 -0700 Committer: Xuefu Zhang Committed: Fri Oct 13 10:03:35 2017 -0700 -- .../test/resources/testconfiguration.properties |8 + .../clientpositive/spark/subquery_multi.q.out | 4129 ++ .../spark/subquery_nested_subquery.q.out| 38 + .../clientpositive/spark/subquery_notin.q.out | 7722 ++ .../spark/subquery_null_agg.q.out | 182 + .../clientpositive/spark/subquery_scalar.q.out | 6619 +++ .../clientpositive/spark/subquery_select.q.out | 5379 .../spark/subquery_shared_alias.q.out | 23 + .../clientpositive/spark/subquery_views.q.out | 598 ++ 9 files changed, 24698 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1a7c0d2..65cd79a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1266,7 +1266,15 @@ spark.query.files=add_part_multiple.q, \ statsfs.q, \ subquery_exists.q, \ subquery_in.q, \ + subquery_multi.q,\ subquery_multiinsert.q, \ + subquery_nested_subquery.q, \ + subquery_notin.q,\ + subquery_null_agg.q,\ + subquery_scalar.q,\ + subquery_select.q, \ + subquery_shared_alias.q, \ + subquery_views.q,\ table_access_keys_stats.q, \ temp_table.q, \ temp_table_gb1.q, \
[3/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out new file mode 100644 index 000..470efca --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -0,0 +1,6619 @@ +PREHOOK: query: create table tnull(i int, c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int, c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null +POSTHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@part_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@part_null +PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +PREHOOK: type: QUERY +PREHOOK: Output: default@part_null +POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@part_null +POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Sta
[5/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_notin.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out new file mode 100644 index 000..d7b9a41 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -0,0 +1,7722 @@ +Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain +select * +from src +where src.key not in + ( select key from src s1 +where s1.key > '2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src +where src.key not in + ( select key from src s1 +where s1.key > '2' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +Reducer 5 <- Map 4 (GROUP, 1) +Reducer 7 <- Map 6 (GROUP, 2) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) +Map 4 +Map Operator Tree: +TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: (key > '2') (type: boolean) +Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count(), count(key) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: bigint), _col1 (type: bigint) +Map 6 +Map Operator Tree: +TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: (key > '2') (type: boolean) +Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE +Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE +Reducer 2 +Reduce Operator Tree: + Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 + 1 +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint) +Reducer 3 +Reduce Operator Tree: + Join Operator +condition map: +
[7/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_multi.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out new file mode 100644 index 000..f9b2c1b --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -0,0 +1,4129 @@ +PREHOOK: query: create table tnull(i int, c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int, c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null +POSTHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@part_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@part_null +PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +PREHOOK: type: QUERY +PREHOOK: Output: default@part_null +POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@part_null +POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depe
[1/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
Repository: hive Updated Branches: refs/heads/master 1253450e0 -> 8eaf18d59 http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out b/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out new file mode 100644 index 000..f907f91 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_shared_alias.q.out @@ -0,0 +1,23 @@ +PREHOOK: query: select * +from src +where src.key in (select key from src where key > '9') +PREHOOK: type: QUERY +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: select * +from src +where src.key in (select key from src where key > '9') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src + A masked pattern was here +92 val_92 +96 val_96 +97 val_97 +97 val_97 +90 val_90 +90 val_90 +90 val_90 +95 val_95 +95 val_95 +98 val_98 +98 val_98 http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_views.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out b/ql/src/test/results/clientpositive/spark/subquery_views.q.out new file mode 100644 index 000..9a1c25f --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out @@ -0,0 +1,598 @@ +PREHOOK: query: create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@cv1 +POSTHOOK: query: create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cv1 +POSTHOOK: Lineage: cv1.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cv1.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe extended cv1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@cv1 +POSTHOOK: query: describe extended cv1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@cv1 +keystring +value string + + A masked pattern was here +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9'), viewExpandedText:select `b`.`key`, `b`.`value` +from `default`.`src` `b` +where exists + (select `a`.`key` + from `default`.`src` `a` + where `b`.`value` = `a`.`value` and `a`.`key` = `b`.`key` and `a`.`value` > 'val_9'), tableType:VIRTUAL_VIEW, rewriteEnabled:false) +PREHOOK: query: select * +from cv1 where cv1.key in (select key from cv1 c where c.key > '95') +PREHOOK: type: QUERY +PREHOOK: Input: default@cv1 +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: select * +from cv1 where cv1.key in (select key from cv1 c where c.key > '95') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cv1 +POSTHOOK: Input: default@src + A masked pattern was here +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: create view cv2 as +select * +from src b +where b.key not in + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_11' + ) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@cv2 +POSTHOOK: query: create view cv2 as +select * +from src b +where b.key not in + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_11' + ) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cv2 +POSTHOOK: Lineage: cv2.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cv2.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe extended cv2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@cv2 +POSTHOOK: query: describe extended cv2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@cv2 +keystring +value string + + A mask
[2/8] hive git commit: HIVE-17756: Enable subquery related Qtests for Hive on Spark (Dapeng via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/8eaf18d5/ql/src/test/results/clientpositive/spark/subquery_select.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out new file mode 100644 index 000..c3f3d58 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -0,0 +1,5379 @@ +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain SELECT p_size, p_size IN ( +SELECT MAX(p_size) FROM part) +FROM part +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT p_size, p_size IN ( +SELECT MAX(p_size) FROM part) +FROM part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Spark + Edges: +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +Reducer 5 <- Map 4 (GROUP, 1) +Reducer 7 <- Map 4 (GROUP, 1) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: p_size (type: int) +outputColumnNames: _col0 +Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) +Map 4 +Map Operator Tree: +TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: p_size (type: int) +outputColumnNames: p_size +Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: max(p_size) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int) +Reducer 2 +Reduce Operator Tree: + Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 + 1 +outputColumnNames: _col0, _col1, _col2 +Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) +Reducer 3 +Reduce Operator Tree: + Join Operator +condition map: + Left Outer Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col2, _col4 +Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: _col0 (type: int), CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (null) ELSE (false) END (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hado
hive git commit: HIVE-17401: Hive session idle timeout doesn't function properly (Reviewed by Peter Vary)
Repository: hive Updated Branches: refs/heads/master 9a5381cb9 -> 660e39e03 HIVE-17401: Hive session idle timeout doesn't function properly (Reviewed by Peter Vary) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/660e39e0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/660e39e0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/660e39e0 Branch: refs/heads/master Commit: 660e39e03b68f6d256a6d4fd41193503a1f711c9 Parents: 9a5381c Author: Xuefu Zhang Authored: Wed Sep 6 10:06:01 2017 -0700 Committer: Xuefu Zhang Committed: Wed Sep 6 10:06:01 2017 -0700 -- .../service/cli/session/HiveSessionImpl.java| 26 1 file changed, 10 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/660e39e0/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java -- diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 57bb53c..906565c 100644 --- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -111,9 +111,8 @@ public class HiveSessionImpl implements HiveSession { // TODO: the control flow for this needs to be defined. Hive is supposed to be thread-local. private Hive sessionHive; - private volatile long lastAccessTime; - private volatile long lastIdleTime; - private volatile int activeCalls = 0; + private volatile long lastAccessTime = System.currentTimeMillis(); + private volatile boolean lockedByUser; private final Semaphore operationLock; @@ -184,7 +183,6 @@ public class HiveSessionImpl implements HiveSession { configureSession(sessionConfMap); } lastAccessTime = System.currentTimeMillis(); -lastIdleTime = lastAccessTime; } /** @@ -384,12 +382,11 @@ public class HiveSessionImpl implements HiveSession { sessionState.setIsUsingThriftJDBCBinarySerDe(updateIsUsingThriftJDBCBinarySerDe()); if (userAccess) { lastAccessTime = System.currentTimeMillis(); + lockedByUser = true; } // set the thread name with the logging prefix. sessionState.updateThreadName(); Hive.set(sessionHive); -activeCalls++; -lastIdleTime = 0; } /** @@ -424,12 +421,7 @@ public class HiveSessionImpl implements HiveSession { } if (userAccess) { lastAccessTime = System.currentTimeMillis(); -} -activeCalls--; -// lastIdleTime is only set by the last one -// who calls release with empty opHandleSet. -if (activeCalls == 0 && opHandleSet.isEmpty()) { - lastIdleTime = System.currentTimeMillis(); + lockedByUser = false; } } @@ -830,16 +822,18 @@ public class HiveSessionImpl implements HiveSession { @Override public long getNoOperationTime() { -return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0; +boolean noMoreOpHandle = false; +synchronized (opHandleSet) { + noMoreOpHandle = opHandleSet.isEmpty(); +} +return noMoreOpHandle && !lockedByUser ? System.currentTimeMillis() - lastAccessTime : 0; } private void closeTimedOutOperations(List operations) { acquire(false, false); try { for (Operation operation : operations) { -synchronized (opHandleSet) { - opHandleSet.remove(operation.getHandle()); -} +removeOpHandle(operation.getHandle()); try { operation.close(); } catch (Exception e) {
hive git commit: HIVE-16961: Hive on Spark leaks spark application in case user cancels query and closes session (reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 26f1bdeb4 -> 0731dab18 HIVE-16961: Hive on Spark leaks spark application in case user cancels query and closes session (reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0731dab1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0731dab1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0731dab1 Branch: refs/heads/master Commit: 0731dab18c85363d4bad8a556c437a587277143c Parents: 26f1bde Author: Xuefu Zhang Authored: Wed Jul 5 10:33:18 2017 -0700 Committer: Xuefu Zhang Committed: Wed Jul 5 10:33:18 2017 -0700 -- .../org/apache/hive/spark/client/SparkClientImpl.java| 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0731dab1/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index bf7e8db..03e773a 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -508,16 +508,19 @@ class SparkClientImpl implements SparkClient { } } - rpcServer.cancelClient(clientId, - "Child process exited before connecting back with error log " + errStr.toString()); LOG.warn("Child process exited with code {}", exitCode); + rpcServer.cancelClient(clientId, + "Child process (spark-submit) exited before connecting back with error log " + errStr.toString()); } } catch (InterruptedException ie) { -LOG.warn("Waiting thread interrupted, killing child process."); +LOG.warn("Thread waiting on the child process (spark-submit) is interrupted, killing the child process."); +rpcServer.cancelClient(clientId, "Thread waiting on the child porcess (spark-submit) is interrupted"); Thread.interrupted(); child.destroy(); } catch (Exception e) { -LOG.warn("Exception while waiting for child process.", e); +String errMsg = "Exception while waiting for child process (spark-submit)"; +LOG.warn(errMsg, e); +rpcServer.cancelClient(clientId, errMsg); } } };
hive git commit: HIVE-16962: Better error msg for Hive on Spark in case user cancels query and closes session (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 539896482 -> 10944ee34 HIVE-16962: Better error msg for Hive on Spark in case user cancels query and closes session (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/10944ee3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/10944ee3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/10944ee3 Branch: refs/heads/master Commit: 10944ee34a39efc0503ca917d1153751e1d495d2 Parents: 5398964 Author: Xuefu Zhang Authored: Thu Jun 29 10:01:05 2017 -0700 Committer: Xuefu Zhang Committed: Thu Jun 29 10:01:05 2017 -0700 -- .../hive/ql/exec/spark/session/SparkSessionImpl.java | 9 - .../org/apache/hive/spark/client/SparkClientImpl.java | 14 ++ 2 files changed, 18 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/10944ee3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java index 51c6715..8224ef9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionImpl.java @@ -56,13 +56,18 @@ public class SparkSessionImpl implements SparkSession { @Override public void open(HiveConf conf) throws HiveException { +LOG.info("Trying to open Spark session {}", sessionId); this.conf = conf; isOpen = true; try { hiveSparkClient = HiveSparkClientFactory.createHiveSparkClient(conf); } catch (Throwable e) { - throw new HiveException("Failed to create spark client.", e); + // It's possible that user session is closed while creating Spark client. + String msg = isOpen ? "Failed to create Spark client for Spark session " + sessionId : +"Spark Session " + sessionId + " is closed before Spark client is created"; + throw new HiveException(msg, e); } +LOG.info("Spark session {} is successfully opened", sessionId); } @Override @@ -121,10 +126,12 @@ public class SparkSessionImpl implements SparkSession { @Override public void close() { +LOG.info("Trying to close Spark session {}", sessionId); isOpen = false; if (hiveSparkClient != null) { try { hiveSparkClient.close(); +LOG.info("Spark session {} is successfully closed", sessionId); cleanScratchDir(); } catch (IOException e) { LOG.error("Failed to close spark session (" + sessionId + ").", e); http://git-wip-us.apache.org/repos/asf/hive/blob/10944ee3/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index e40aa6b..bf7e8db 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -107,19 +107,25 @@ class SparkClientImpl implements SparkClient { // The RPC server will take care of timeouts here. this.driverRpc = rpcServer.registerClient(clientId, secret, protocol).get(); } catch (Throwable e) { + String errorMsg = null; if (e.getCause() instanceof TimeoutException) { -LOG.error("Timed out waiting for client to connect.\nPossible reasons include network " + +errorMsg = "Timed out waiting for client to connect.\nPossible reasons include network " + "issues, errors in remote driver or the cluster has no available resources, etc." + -"\nPlease check YARN or Spark driver's logs for further information.", e); +"\nPlease check YARN or Spark driver's logs for further information."; + } else if (e.getCause() instanceof InterruptedException) { +errorMsg = "Interruption occurred while waiting for client to connect.\nPossibly the Spark session is closed " + +"such as in case of query cancellation." + +"\nPlease refer to HiveServer2 logs for further information."; } else { -LOG.error("Error while waiting for client to connect.", e); +errorMsg = "Error while waiting for client to connect.";
[1/2] hive git commit: HIVE-16799: Control the max number of task for a stage in a spark job (Reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 690a9f8e2 -> 788d486e8 HIVE-16799: Control the max number of task for a stage in a spark job (Reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/363ffe0a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/363ffe0a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/363ffe0a Branch: refs/heads/master Commit: 363ffe0ac7dec7e4804c1eb2ba76cb07660ae020 Parents: b560f49 Author: Xuefu Zhang Authored: Fri Jun 2 11:26:33 2017 -0700 Committer: Xuefu Zhang Committed: Fri Jun 2 11:26:33 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../test/resources/testconfiguration.properties | 3 +- .../hadoop/hive/cli/control/CliConfigs.java | 1 + .../hadoop/hive/ql/exec/spark/SparkTask.java| 2 +- .../spark/status/RemoteSparkJobMonitor.java | 14 .../ql/exec/spark/status/SparkJobMonitor.java | 12 +++ .../clientnegative/spark_stage_max_tasks.q | 6 ++ .../spark/spark_stage_max_tasks.q.out | 77 8 files changed, 115 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 176d36f..fce8db3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3371,6 +3371,8 @@ public class HiveConf extends Configuration { "Turn this off when there is a memory issue."), SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of tasks a Spark job may have.\n" + "If a Spark job contains more tasks than the maximum, it will be cancelled. A value of -1 means no limit."), +SPARK_STAGE_MAX_TASKS("hive.spark.stage.max.tasks", -1, "The maximum number of tasks a stage in a Spark job may have.\n" + +"If a Spark job stage contains more tasks than the maximum, the job will be cancelled. A value of -1 means no limit."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e613374..62462bd 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1459,5 +1459,6 @@ spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ groupby3_map_skew_multi_distinct.q,\ groupby3_multi_distinct.q,\ groupby_grouping_sets7.q,\ - spark_job_max_tasks.q + spark_job_max_tasks.q,\ + spark_stage_max_tasks.q http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java -- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 1457db0..27b87fb 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -288,6 +288,7 @@ public class CliConfigs { excludesFrom(testConfigProps, "minimr.query.negative.files"); excludeQuery("authorization_uri_import.q"); excludeQuery("spark_job_max_tasks.q"); +excludeQuery("spark_stage_max_tasks.q"); setResultsDir("ql/src/test/results/clientnegative"); setLogDir("itests/qtest/target/qfile-results/clientnegative"); http://git-wip-us.apache.org/repos/asf/hive/blob/363ffe0a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index b4fb49f..2ee8c93 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask
[2/2] hive git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/788d486e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/788d486e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/788d486e Branch: refs/heads/master Commit: 788d486e8fbf58919d04f15e965050f1e885093f Parents: 363ffe0 690a9f8 Author: Xuefu Zhang Authored: Fri Jun 2 11:27:38 2017 -0700 Committer: Xuefu Zhang Committed: Fri Jun 2 11:27:38 2017 -0700 -- bin/ext/version.sh | 2 +- bin/hive| 20 ++-- .../hive/http/Log4j2ConfiguratorServlet.java| 18 +++--- .../hadoop/hive/metastore/HiveAlterHandler.java | 4 ++-- .../hadoop/hive/metastore/MetaStoreUtils.java | 14 +++--- .../hive/metastore/TestMetaStoreUtils.java | 16 +--- .../fast/VectorMapJoinFastHashTable.java| 3 ++- 7 files changed, 38 insertions(+), 39 deletions(-) --
hive git commit: HIVE-16456: Kill spark job when InterruptedException happens or driverContext.isShutdown is true (Zhihai via Xuefu)
Repository: hive Updated Branches: refs/heads/master 067d953bf -> 4ba48aa5f HIVE-16456: Kill spark job when InterruptedException happens or driverContext.isShutdown is true (Zhihai via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4ba48aa5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4ba48aa5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4ba48aa5 Branch: refs/heads/master Commit: 4ba48aa5fcaa981ee469161bbf17611aa0392fd2 Parents: 067d953 Author: Xuefu Zhang Authored: Tue May 9 09:40:13 2017 -0700 Committer: Xuefu Zhang Committed: Tue May 9 09:40:13 2017 -0700 -- .../hadoop/hive/ql/exec/spark/SparkTask.java| 32 +--- 1 file changed, 28 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4ba48aa5/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 98b1605..b4fb49f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -85,6 +85,7 @@ public class SparkTask extends Task { private transient List stageIds; private transient SparkJobRef jobRef = null; private transient boolean isShutdown = false; + private transient boolean jobKilled = false; @Override public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext driverContext, @@ -112,6 +113,11 @@ public class SparkTask extends Task { jobRef = sparkSession.submit(driverContext, sparkWork); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB); + if (driverContext.isShutdown()) { +killJob(); +throw new HiveException("Operation is cancelled."); + } + addToHistory(jobRef); sparkJobID = jobRef.getJobId(); this.jobID = jobRef.getSparkJobStatus().getAppID(); @@ -130,11 +136,11 @@ public class SparkTask extends Task { // ideally also cancel the app request here. But w/o facilities from Spark or YARN, // it's difficult to do it on hive side alone. See HIVE-12650. LOG.info("Failed to submit Spark job " + sparkJobID); -jobRef.cancelJob(); +killJob(); } else if (rc == 4) { LOG.info("The number of tasks reaches above the limit " + conf.getIntVar(HiveConf.ConfVars.SPARK_JOB_MAX_TASKS) + ". Cancelling Spark job " + sparkJobID + " with application ID " + jobID ); -jobRef.cancelJob(); +killJob(); } if (this.jobID == null) { @@ -305,14 +311,27 @@ public class SparkTask extends Task { @Override public void shutdown() { super.shutdown(); -if (jobRef != null && !isShutdown) { +killJob(); +isShutdown = true; + } + + private void killJob() { +boolean needToKillJob = false; +if (jobRef != null && !jobKilled) { + synchronized (this) { +if (!jobKilled) { + jobKilled = true; + needToKillJob = true; +} + } +} +if (needToKillJob) { try { jobRef.cancelJob(); } catch (Exception e) { LOG.warn("failed to kill job", e); } } -isShutdown = true; } /** @@ -393,6 +412,11 @@ public class SparkTask extends Task { if (rc != 0) { Throwable error = sparkJobStatus.getError(); if (error != null) { + if ((error instanceof InterruptedException) || + (error instanceof HiveException && + error.getCause() instanceof InterruptedException)) { +killJob(); + } setException(error); } }
hive git commit: HIVE-16552: Limit the number of tasks a Spark job may contain (Reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 9e9356b5e -> c6b5ad663 HIVE-16552: Limit the number of tasks a Spark job may contain (Reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6b5ad66 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6b5ad66 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6b5ad66 Branch: refs/heads/master Commit: c6b5ad663d235c15fc5bb5a24a1d3e9ac0d05140 Parents: 9e9356b Author: Xuefu Zhang Authored: Thu May 4 09:31:28 2017 -0700 Committer: Xuefu Zhang Committed: Thu May 4 09:31:28 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../test/resources/testconfiguration.properties | 4 +- .../hadoop/hive/cli/control/CliConfigs.java | 1 + .../hadoop/hive/ql/exec/spark/SparkTask.java| 6 ++ .../spark/status/RemoteSparkJobMonitor.java | 15 +++- .../ql/exec/spark/status/SparkJobMonitor.java | 10 ++- .../clientnegative/spark_job_max_tasks.q| 6 ++ .../spark/spark_job_max_tasks.q.out | 77 8 files changed, 118 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 84398c6..99c26ce 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3350,6 +3350,8 @@ public class HiveConf extends Configuration { "hive.spark.use.groupby.shuffle", true, "Spark groupByKey transformation has better performance but uses unbounded memory." + "Turn this off when there is a memory issue."), +SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of tasks a Spark job may have.\n" + +"If a Spark job contains more tasks than the maximum, it will be cancelled. A value of -1 means no limit."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 753f3a9..5ab3076 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1445,4 +1445,6 @@ spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ groupby2_multi_distinct.q,\ groupby3_map_skew_multi_distinct.q,\ groupby3_multi_distinct.q,\ - groupby_grouping_sets7.q + groupby_grouping_sets7.q,\ + spark_job_max_tasks.q + http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java -- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 67064b8..1457db0 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -287,6 +287,7 @@ public class CliConfigs { excludesFrom(testConfigProps, "minimr.query.negative.files"); excludeQuery("authorization_uri_import.q"); +excludeQuery("spark_job_max_tasks.q"); setResultsDir("ql/src/test/results/clientnegative"); setLogDir("itests/qtest/target/qfile-results/clientnegative"); http://git-wip-us.apache.org/repos/asf/hive/blob/c6b5ad66/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 32a7730..98b1605 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -129,8 +129,14 @@ public class SparkTask extends Task { // TODO: If the timeout is because of lack of resources in the cluster, we should // ideally a
[1/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
Repository: hive Updated Branches: refs/heads/master 812fa3946 -> 00b644482 http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out new file mode 100644 index 000..ca0910a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -0,0 +1,5921 @@ +PREHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: +name:default.src_orc_merge_test_part + +PREHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part +PREHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +PREHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +keyint +value string +ds string +ts string + +# Partition Information +# col_name data_type comment + +ds string +ts string + + A masked pattern was here +PREHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-2 + Stats-Aggr Operator +Stage-0 + Move Operator +table:{"name:":"default.src_orc_merge_test_part"} +Stage-1 + Map 1 + File Output Operator [FS_3] +table:{"name:":"default.src_orc_merge_test_part"} +Select Operator [SEL_1] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=10) +default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + +PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-
[3/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/00b64448 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/00b64448 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/00b64448 Branch: refs/heads/master Commit: 00b644482656da9fb40788744e692f4e677b4c0d Parents: 812fa39 Author: Xuefu Zhang Authored: Tue May 2 10:28:37 2017 -0700 Committer: Xuefu Zhang Committed: Tue May 2 10:28:37 2017 -0700 -- .../hive/common/jsonexplain/Connection.java | 35 + .../hive/common/jsonexplain/DagJsonParser.java | 167 + .../common/jsonexplain/DagJsonParserUtils.java | 53 + .../common/jsonexplain/JsonParserFactory.java |4 + .../hadoop/hive/common/jsonexplain/Op.java | 358 ++ .../hadoop/hive/common/jsonexplain/Printer.java | 41 + .../hadoop/hive/common/jsonexplain/Stage.java | 262 + .../hadoop/hive/common/jsonexplain/Vertex.java | 323 + .../jsonexplain/spark/SparkJsonParser.java | 35 + .../hive/common/jsonexplain/tez/Connection.java | 35 - .../hadoop/hive/common/jsonexplain/tez/Op.java | 356 -- .../hive/common/jsonexplain/tez/Printer.java| 41 - .../hive/common/jsonexplain/tez/Stage.java | 262 - .../common/jsonexplain/tez/TezJsonParser.java | 153 +- .../jsonexplain/tez/TezJsonParserUtils.java | 53 - .../hive/common/jsonexplain/tez/Vertex.java | 334 - .../org/apache/hadoop/hive/conf/HiveConf.java |5 +- .../test/resources/testconfiguration.properties |1 + .../hadoop/hive/ql/optimizer/Optimizer.java |2 +- .../hive/ql/parse/ExplainSemanticAnalyzer.java | 16 +- .../apache/hadoop/hive/ql/plan/SparkWork.java | 10 +- .../clientpositive/spark_explainuser_1.q| 671 ++ .../spark/spark_explainuser_1.q.out | 5921 ++ 23 files changed, 7915 insertions(+), 1223 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java new file mode 100644 index 000..0df6f4c --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/Connection.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain; + +public final class Connection implements Comparable{ + public final String type; + public final Vertex from; + + public Connection(String type, Vertex from) { +super(); +this.type = type; +this.from = from; + } + + @Override + public int compareTo(Connection o) { +return from.compareTo(o.from); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java new file mode 100644 index 000..1f01685 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licen
[2/3] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/00b64448/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java index ea86048..294dc6b 100644 --- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java @@ -18,146 +18,29 @@ package org.apache.hadoop.hive.common.jsonexplain.tez; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; +import org.apache.hadoop.hive.common.jsonexplain.DagJsonParser; -import org.apache.hadoop.hive.common.jsonexplain.JsonParser; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public final class TezJsonParser implements JsonParser { - public final Map stages = new LinkedHashMap<>(); - protected final Logger LOG; - // the objects that have been printed. - public final Set printSet = new LinkedHashSet<>(); - // the vertex that should be inlined. - public final Map> inlineMap = new LinkedHashMap<>(); - - public TezJsonParser() { -super(); -LOG = LoggerFactory.getLogger(this.getClass().getName()); - } - - public void extractStagesAndPlans(JSONObject inputObject) throws Exception { -// extract stages -JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES"); -if (dependency != null && dependency.length() > 0) { - // iterate for the first time to get all the names of stages. - for (String stageName : JSONObject.getNames(dependency)) { -this.stages.put(stageName, new Stage(stageName, this)); - } - // iterate for the second time to get all the dependency. - for (String stageName : JSONObject.getNames(dependency)) { -JSONObject dependentStageNames = dependency.getJSONObject(stageName); -this.stages.get(stageName).addDependency(dependentStageNames, this.stages); - } -} -// extract stage plans -JSONObject stagePlans = inputObject.getJSONObject("STAGE PLANS"); -if (stagePlans != null && stagePlans.length() > 0) { - for (String stageName : JSONObject.getNames(stagePlans)) { -JSONObject stagePlan = stagePlans.getJSONObject(stageName); -this.stages.get(stageName).extractVertex(stagePlan); - } -} - } - - /** - * @param indentFlag - * help to generate correct indent - * @return - */ - public static String prefixString(int indentFlag) { -StringBuilder sb = new StringBuilder(); -for (int index = 0; index < indentFlag; index++) { - sb.append(" "); -} -return sb.toString(); - } - - /** - * @param indentFlag - * @param tail - * help to generate correct indent with a specific tail - * @return - */ - public static String prefixString(int indentFlag, String tail) { -StringBuilder sb = new StringBuilder(); -for (int index = 0; index < indentFlag; index++) { - sb.append(" "); -} -int len = sb.length(); -return sb.replace(len - tail.length(), len, tail).toString(); - } +public class TezJsonParser extends DagJsonParser { @Override - public void print(JSONObject inputObject, PrintStream outputStream) throws Exception { -LOG.info("JsonParser is parsing:" + inputObject.toString()); -this.extractStagesAndPlans(inputObject); -Printer printer = new Printer(); -// print out the cbo info -if (inputObject.has("cboInfo")) { - printer.println(inputObject.getString("cboInfo")); - printer.println(); -} -// print out the vertex dependency in root stage -for (Stage candidate : this.stages.values()) { - if (candidate.tezStageDependency != null && candidate.tezStageDependency.size() > 0) { -printer.println("Vertex dependency in root stage"); -for (Entry> entry : candidate.tezStageDependency.entrySet()) { - StringBuilder sb = new StringBuilder(); - sb.append(entry.getKey().name); - sb.append(" <- "); - boolean printcomma = false; - for (Connection connection : entry.getValue()) { -if (printcomma) { - sb.append(", "); -} else { - printcomma = true; -} -sb.append(connection.from.name + " (" + connection.type + ")"); - } - printer.println(sb.toString()); -} -printer.println(); - } + public String mapEdgeType(String edgeName) { +switch (edgeName) { + case "BROADCAST_EDGE": +return "BROADCAST"; + case "SIMPLE_EDGE": +return "SHUFFLE"; + c
hive git commit: HIVE-16524: Remove the redundant item type in hiveserver2.jsp and QueryProfileTmpl.jamon (ZhangBing via Xuefu)
Repository: hive Updated Branches: refs/heads/master 79e3c5a8d -> 5ab03cba5 HIVE-16524: Remove the redundant item type in hiveserver2.jsp and QueryProfileTmpl.jamon (ZhangBing via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ab03cba Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ab03cba Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ab03cba Branch: refs/heads/master Commit: 5ab03cba5999de0c95e24aafad074099231297bc Parents: 79e3c5a Author: Xuefu Zhang Authored: Mon May 1 18:27:53 2017 -0700 Committer: Xuefu Zhang Committed: Mon May 1 18:27:53 2017 -0700 -- .../src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon | 10 +- .../resources/hive-webapps/hiveserver2/hiveserver2.jsp| 8 2 files changed, 9 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5ab03cba/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon -- diff --git a/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon b/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon index 07aa3c1..fa69eb2 100644 --- a/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon +++ b/service/src/jamon/org/apache/hive/tmpl/QueryProfileTmpl.jamon @@ -169,7 +169,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; <%args> SQLOperationDisplay sod; - + Stage Id Status @@ -218,7 +218,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Compile-time metadata operations - + Call Name Time (ms) @@ -237,7 +237,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Execution-time metadata operations - + Call Name Time (ms) @@ -256,7 +256,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Compile-Time Perf-Logger - + Compile-time Call Name Time (ms) @@ -275,7 +275,7 @@ org.apache.hive.service.cli.operation.SQLOperationDisplay; Execution-Time Perf-Logger - + Execution-time Call Name Time (ms) http://git-wip-us.apache.org/repos/asf/hive/blob/5ab03cba/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp -- diff --git a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp index 33797ed..0e0803b 100644 --- a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp +++ b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp @@ -98,7 +98,7 @@ if (sessionManager != null) { Active Sessions - + User Name IP Address @@ -128,7 +128,7 @@ for (HiveSession hiveSession: hiveSessions) { Open Queries - + User Name Query @@ -169,7 +169,7 @@ for (HiveSession hiveSession: hiveSessions) { Last Max <%= conf.get(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES.varname) %> Closed Queries - + User Name Query @@ -213,7 +213,7 @@ for (HiveSession hiveSession: hiveSessions) { Software Attributes - + Attribute Name Value
[2/2] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79e3c5a8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79e3c5a8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79e3c5a8 Branch: refs/heads/master Commit: 79e3c5a8d10e60ae1a981e74b0c48011d3fb2cdc Parents: 62fbdd8 Author: Xuefu Zhang Authored: Mon May 1 18:16:27 2017 -0700 Committer: Xuefu Zhang Committed: Mon May 1 18:16:27 2017 -0700 -- .../hive/common/jsonexplain/DagJsonParser.java | 167 + .../common/jsonexplain/JsonParserFactory.java |4 + .../jsonexplain/spark/SparkJsonParser.java | 35 + .../hive/common/jsonexplain/tez/Connection.java |2 +- .../hadoop/hive/common/jsonexplain/tez/Op.java | 54 +- .../hive/common/jsonexplain/tez/Printer.java|2 +- .../hive/common/jsonexplain/tez/Stage.java | 20 +- .../common/jsonexplain/tez/TezJsonParser.java | 153 +- .../jsonexplain/tez/TezJsonParserUtils.java |6 +- .../hive/common/jsonexplain/tez/Vertex.java | 87 +- .../org/apache/hadoop/hive/conf/HiveConf.java |5 +- .../test/resources/testconfiguration.properties |1 + .../hadoop/hive/ql/optimizer/Optimizer.java |2 +- .../hive/ql/parse/ExplainSemanticAnalyzer.java | 16 +- .../apache/hadoop/hive/ql/plan/SparkWork.java | 10 +- .../clientpositive/spark_explainuser_1.q| 671 ++ .../spark/spark_explainuser_1.q.out | 5921 ++ 17 files changed, 6924 insertions(+), 232 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/79e3c5a8/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java new file mode 100644 index 000..1f01685 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/DagJsonParser.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.hadoop.hive.common.jsonexplain.JsonParser; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class DagJsonParser implements JsonParser { + public final Map stages = new LinkedHashMap<>(); + protected final Logger LOG; + // the objects that have been printed. + public final Set printSet = new LinkedHashSet<>(); + // the vertex that should be inlined. + public final Map> inlineMap = new LinkedHashMap<>(); + + public DagJsonParser() { +super(); +LOG = LoggerFactory.getLogger(this.getClass().getName()); + } + + public void extractStagesAndPlans(JSONObject inputObject) throws Exception { +// extract stages +JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES"); +if (dependency != null && dependency.length() > 0) { + // iterate for the first time to get all the names of stages. + for (String stageName : JSONObject.getNames(dependency)) { +this.stages.put(stageName, new Stage(stageName, this)); + } + // iterate for the second time to get all the dependency. + for (String stageName : JSONObject.getNames(dependency)) { +JSONObject dependentStageNames = dependency.getJSONObject(stageName); +this.stages.get(stageName).addDependency(dependentStageNames, this.stages); + } +} +// extract stage plans +JSONObject stagePlans = inputObject.getJSONObject("STAGE PLANS"); +if (stagePlans != null && stagePla
[1/2] hive git commit: HIVE-11133: Support hive.explain.user for Spark (Sahil via Xuefu)
Repository: hive Updated Branches: refs/heads/master 62fbdd86e -> 79e3c5a8d http://git-wip-us.apache.org/repos/asf/hive/blob/79e3c5a8/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out new file mode 100644 index 000..ca0910a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -0,0 +1,5921 @@ +PREHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: +name:default.src_orc_merge_test_part + +PREHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part +PREHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +PREHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +keyint +value string +ds string +ts string + +# Partition Information +# col_name data_type comment + +ds string +ts string + + A masked pattern was here +PREHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-2 + Stats-Aggr Operator +Stage-0 + Move Operator +table:{"name:":"default.src_orc_merge_test_part"} +Stage-1 + Map 1 + File Output Operator [FS_3] +table:{"name:":"default.src_orc_merge_test_part"} +Select Operator [SEL_1] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=10) +default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + +PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-
hive git commit: HIVE-12614: RESET command does not close spark session (Sahil via Xuefu)
Repository: hive Updated Branches: refs/heads/master fb3df4641 -> 1ed36f042 HIVE-12614: RESET command does not close spark session (Sahil via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ed36f04 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ed36f04 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ed36f04 Branch: refs/heads/master Commit: 1ed36f0428d53303d02ddd5c8a3a6c7f8db9e19a Parents: fb3df46 Author: Xuefu Zhang Authored: Tue Apr 25 14:09:35 2017 -0700 Committer: Xuefu Zhang Committed: Tue Apr 25 14:10:39 2017 -0700 -- .../hive/ql/processors/ResetProcessor.java | 21 --- .../hadoop/hive/ql/processors/SetProcessor.java | 15 +++-- .../hive/ql/processors/TestResetProcessor.java | 59 3 files changed, 82 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ed36f04/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java index bbd4501..b40879d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/ResetProcessor.java @@ -23,7 +23,11 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; + import org.apache.commons.lang3.StringUtils; + import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.SystemVariables; @@ -33,7 +37,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.session.SessionState; -import com.google.common.collect.Lists; public class ResetProcessor implements CommandProcessor { @@ -45,8 +48,11 @@ public class ResetProcessor implements CommandProcessor { @Override public CommandProcessorResponse run(String command) throws CommandNeedRetryException { -SessionState ss = SessionState.get(); +return run(SessionState.get(), command); + } + @VisibleForTesting + CommandProcessorResponse run(SessionState ss, String command) throws CommandNeedRetryException { CommandProcessorResponse authErrResp = CommandUtil.authorizeCommand(ss, HiveOperationType.RESET, Arrays.asList(command)); if (authErrResp != null) { @@ -88,7 +94,7 @@ public class ResetProcessor implements CommandProcessor { ? Lists.newArrayList("Resetting " + message + " to default values") : null); } - private void resetOverridesOnly(SessionState ss) { + private static void resetOverridesOnly(SessionState ss) { if (ss.getOverriddenConfigurations().isEmpty()) return; HiveConf conf = new HiveConf(); for (String key : ss.getOverriddenConfigurations().keySet()) { @@ -97,21 +103,20 @@ public class ResetProcessor implements CommandProcessor { ss.getOverriddenConfigurations().clear(); } - private void resetOverrideOnly(SessionState ss, String varname) { + private static void resetOverrideOnly(SessionState ss, String varname) { if (!ss.getOverriddenConfigurations().containsKey(varname)) return; setSessionVariableFromConf(ss, varname, new HiveConf()); ss.getOverriddenConfigurations().remove(varname); } - private void setSessionVariableFromConf(SessionState ss, String varname, - HiveConf conf) { + private static void setSessionVariableFromConf(SessionState ss, String varname, HiveConf conf) { String value = conf.get(varname); if (value != null) { - ss.getConf().set(varname, value); + SetProcessor.setConf(ss, varname, varname, value, false); } } - private CommandProcessorResponse resetToDefault(SessionState ss, String varname) { + private static CommandProcessorResponse resetToDefault(SessionState ss, String varname) { varname = varname.trim(); try { String nonErrorMessage = null; http://git-wip-us.apache.org/repos/asf/hive/blob/1ed36f04/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java index 0ffa182..1458211 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java @@ -209,17 +209,22 @@ public class
hive git commit: HIVE-16430: Add log to show the cancelled query id when cancelOperation is called (Zhihai via Xuefu)
Repository: hive Updated Branches: refs/heads/master 794cfa35a -> 4deefcd50 HIVE-16430: Add log to show the cancelled query id when cancelOperation is called (Zhihai via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4deefcd5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4deefcd5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4deefcd5 Branch: refs/heads/master Commit: 4deefcd50433c29dcf7cb4d1e422097b9cfcbca1 Parents: 794cfa3 Author: Xuefu Zhang Authored: Fri Apr 14 06:59:46 2017 -0700 Committer: Xuefu Zhang Committed: Fri Apr 14 06:59:46 2017 -0700 -- .../apache/hive/service/cli/operation/SQLOperation.java | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4deefcd5/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index 04fc0a1..d9bfba87 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -398,9 +398,11 @@ public class SQLOperation extends ExecuteStatementOperation { Future backgroundHandle = getBackgroundHandle(); if (backgroundHandle != null) { boolean success = backgroundHandle.cancel(true); +String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); if (success) { - String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); LOG.info("The running operation has been successfully interrupted: " + queryId); +} else if (state == OperationState.CANCELED) { + LOG.info("The running operation could not be cancelled, typically because it has already completed normally: " + queryId); } } } @@ -427,8 +429,16 @@ public class SQLOperation extends ExecuteStatementOperation { @Override public void cancel(OperationState stateAfterCancel) throws HiveSQLException { +String queryId = null; +if (stateAfterCancel == OperationState.CANCELED) { + queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); + LOG.info("Cancelling the query execution: " + queryId); +} cleanup(stateAfterCancel); cleanupOperationLog(); +if (stateAfterCancel == OperationState.CANCELED) { + LOG.info("Successfully cancelled the query: " + queryId); +} } @Override
hive git commit: HIVE-16286: Log canceled query id (Jimmy via Xuefu)
Repository: hive Updated Branches: refs/heads/master 736d2e861 -> 8fa9d5833 HIVE-16286: Log canceled query id (Jimmy via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8fa9d583 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8fa9d583 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8fa9d583 Branch: refs/heads/master Commit: 8fa9d5833c5c4fda092a917c9881db8fbfea5332 Parents: 736d2e8 Author: Xuefu Zhang Authored: Thu Mar 23 19:36:16 2017 -0700 Committer: Xuefu Zhang Committed: Thu Mar 23 19:36:16 2017 -0700 -- .../org/apache/hive/service/cli/operation/SQLOperation.java| 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8fa9d583/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index ff389ac..f41092e 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -172,8 +172,9 @@ public class SQLOperation extends ExecuteStatementOperation { @Override public void run() { try { + String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); LOG.info("Query timed out after: " + queryTimeout - + " seconds. Cancelling the execution now."); + + " seconds. Cancelling the execution now: " + queryId); SQLOperation.this.cancel(OperationState.TIMEDOUT); } catch (HiveSQLException e) { LOG.error("Error cancelling the query after timeout: " + queryTimeout + " seconds", e); @@ -412,7 +413,8 @@ public class SQLOperation extends ExecuteStatementOperation { if (backgroundHandle != null) { boolean success = backgroundHandle.cancel(true); if (success) { - LOG.info("The running operation has been successfully interrupted."); + String queryId = confOverlay.get(HiveConf.ConfVars.HIVEQUERYID.varname); + LOG.info("The running operation has been successfully interrupted: " + queryId); } } }
[1/2] hive git commit: HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter)
Repository: hive Updated Branches: refs/heads/master 1f7e26ff2 -> 71f4930d9 http://git-wip-us.apache.org/repos/asf/hive/blob/71f4930d/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java -- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java index 7a565dd..f733c1e 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java @@ -145,7 +145,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * Int: 8 decimal digits. An even number and 1/2 of MAX_LONGWORD_DECIMAL. */ private static final int INTWORD_DECIMAL_DIGITS = 8; - private static final int MAX_INTWORD_DECIMAL = (int) powerOfTenTable[INTWORD_DECIMAL_DIGITS] - 1; private static final int MULTIPLER_INTWORD_DECIMAL = (int) powerOfTenTable[INTWORD_DECIMAL_DIGITS]; /** @@ -164,9 +163,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { private static final long MAX_HIGHWORD_DECIMAL = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS] - 1; - private static long HIGHWORD_DIVIDE_FACTOR = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - HIGHWORD_DECIMAL_DIGITS]; - private static long HIGHWORD_MULTIPLY_FACTOR = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS]; - // 38 * 2 or 76 full decimal maximum - (64 + 8) digits in 4 lower longs (4 digits here). private static final long FULL_MAX_HIGHWORD_DECIMAL = powerOfTenTable[MAX_DECIMAL_DIGITS * 2 - (FOUR_X_LONGWORD_DECIMAL_DIGITS + INTWORD_DECIMAL_DIGITS)] - 1; @@ -189,11 +185,6 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { BigInteger.ONE.add(BIG_INTEGER_MAX_LONGWORD_DECIMAL); private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_2X = BIG_INTEGER_LONGWORD_MULTIPLIER.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER); - private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_3X = - BIG_INTEGER_LONGWORD_MULTIPLIER_2X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER); - private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_4X = - BIG_INTEGER_LONGWORD_MULTIPLIER_3X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER); - private static final BigInteger BIG_INTEGER_MAX_HIGHWORD_DECIMAL = BigInteger.valueOf(MAX_HIGHWORD_DECIMAL); private static final BigInteger BIG_INTEGER_HIGHWORD_MULTIPLIER = @@ -203,21 +194,21 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { // conversion. // There is only one blank in UTF-8. - private final static byte BYTE_BLANK = (byte) ' '; + private static final byte BYTE_BLANK = (byte) ' '; - private final static byte BYTE_DIGIT_ZERO = (byte) '0'; - private final static byte BYTE_DIGIT_NINE = (byte) '9'; + private static final byte BYTE_DIGIT_ZERO = (byte) '0'; + private static final byte BYTE_DIGIT_NINE = (byte) '9'; // Decimal point. - private final static byte BYTE_DOT = (byte) '.'; + private static final byte BYTE_DOT = (byte) '.'; // Sign. - private final static byte BYTE_MINUS = (byte) '-'; - private final static byte BYTE_PLUS = (byte) '+'; + private static final byte BYTE_MINUS = (byte) '-'; + private static final byte BYTE_PLUS = (byte) '+'; // Exponent E or e. - private final static byte BYTE_EXPONENT_LOWER = (byte) 'e'; - private final static byte BYTE_EXPONENT_UPPER = (byte) 'E'; + private static final byte BYTE_EXPONENT_LOWER = (byte) 'e'; + private static final byte BYTE_EXPONENT_UPPER = (byte) 'E'; // // Initialize (fastSetFrom*). @@ -1758,7 +1749,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * 4,611,686,018,427,387,904 or * 461,1686018427387904 (16 digit comma'd) */ - private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 = + private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 = new FastHiveDecimal(1, 1686018427387904L, 461L, 0, 19, 0); /* @@ -1769,7 +1760,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * 9,223,372,036,854,775,808 or * 922,3372036854775808 (16 digit comma'd) */ - private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 = + private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 = new FastHiveDecimal(1, 3372036854775808L, 922L, 0, 19, 0); /* @@ -1784,7 +1775,7 @@ public class FastHiveDecimalImpl extends FastHiveDecimal { * 42,535,295,865,117,307,932,921,825,928,971,026,432 or * 425352,9586511730793292,1825928971026432 (16 digit comma'd) */ - private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 = + private static final FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 = new Fast
[2/2] hive git commit: HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter)
HIVE-16183: Fix potential thread safety issues with static variables (reviewed by rui, sergey, and peter) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/71f4930d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/71f4930d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/71f4930d Branch: refs/heads/master Commit: 71f4930d95475e7e63b5acc55af3809aefcc71e0 Parents: 1f7e26f Author: Xuefu Zhang Authored: Thu Mar 16 19:20:41 2017 -0700 Committer: Xuefu Zhang Committed: Thu Mar 16 19:20:41 2017 -0700 -- .../org/apache/hive/beeline/BeeLineOpts.java| 2 +- .../apache/hive/beeline/HiveSchemaHelper.java | 12 ++-- .../org/apache/hive/beeline/HiveSchemaTool.java | 2 +- .../org/apache/hadoop/hive/cli/RCFileCat.java | 13 ++-- .../apache/hadoop/hive/cli/TestRCFileCat.java | 4 +- .../org/apache/hadoop/hive/common/LogUtils.java | 5 +- .../hadoop/hive/common/StatsSetupConst.java | 2 +- .../hive/metastore/TestMetastoreVersion.java| 4 +- .../hive/metastore/MetaStoreSchemaInfo.java | 16 ++--- .../hadoop/hive/ql/exec/ArchiveUtils.java | 7 +- .../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +- .../apache/hadoop/hive/ql/exec/Utilities.java | 21 +++--- .../ql/exec/vector/VectorizationContext.java| 2 +- .../exec/vector/expressions/CuckooSetBytes.java | 4 +- .../fast/VectorMapJoinFastHashTable.java| 6 +- .../hadoop/hive/ql/history/HiveHistoryImpl.java | 5 +- .../apache/hadoop/hive/ql/index/HiveIndex.java | 4 +- .../hadoop/hive/ql/io/HiveFileFormatUtils.java | 2 +- .../hadoop/hive/ql/io/HiveInputFormat.java | 3 +- .../org/apache/hadoop/hive/ql/io/RCFile.java| 2 +- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 6 +- .../ql/io/rcfile/stats/PartialScanTask.java | 6 +- .../hadoop/hive/ql/metadata/VirtualColumn.java | 2 +- .../hive/ql/optimizer/GenMapRedUtils.java | 6 +- .../ListBucketingPrunerUtils.java | 4 +- .../physical/GenMRSkewJoinProcessor.java| 10 +-- .../hive/ql/optimizer/physical/Vectorizer.java | 42 +-- .../ql/optimizer/physical/VectorizerReason.java | 2 +- .../hive/ql/parse/BaseSemanticAnalyzer.java | 8 +-- .../hive/ql/parse/DDLSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/WindowingSpec.java | 2 +- .../hadoop/hive/ql/plan/AbstractVectorDesc.java | 4 +- .../apache/hadoop/hive/ql/plan/GroupByDesc.java | 2 +- .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 6 +- .../hadoop/hive/ql/plan/ReduceSinkDesc.java | 2 +- .../hive/ql/plan/VectorAppMasterEventDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorFileSinkDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorFilterDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorGroupByDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorLimitDesc.java| 2 +- .../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorMapJoinInfo.java | 2 +- .../hive/ql/plan/VectorPartitionDesc.java | 2 +- .../hive/ql/plan/VectorReduceSinkDesc.java | 2 +- .../hive/ql/plan/VectorReduceSinkInfo.java | 2 +- .../hadoop/hive/ql/plan/VectorSMBJoinDesc.java | 2 +- .../hadoop/hive/ql/plan/VectorSelectDesc.java | 2 +- .../ql/plan/VectorSparkHashTableSinkDesc.java | 2 +- .../VectorSparkPartitionPruningSinkDesc.java| 2 +- .../hive/ql/plan/VectorTableScanDesc.java | 2 +- .../hadoop/hive/ql/processors/HiveCommand.java | 2 +- .../udf/generic/GenericUDFInternalInterval.java | 4 +- .../mapjoin/fast/CheckFastRowHashMap.java | 17 ++--- .../mapjoin/fast/CommonFastHashTable.java | 4 +- .../apache/hadoop/hive/serde2/SerDeUtils.java | 2 +- .../hive/serde2/avro/AvroDeserializer.java | 2 +- .../lazy/fast/LazySimpleDeserializeRead.java| 4 +- .../hive/serde2/lazy/fast/StringToDouble.java | 4 +- .../hive/serde2/lazybinary/LazyBinaryUtils.java | 2 +- .../hive/serde2/typeinfo/TypeInfoUtils.java | 2 +- .../org/apache/hadoop/hive/io/HdfsUtils.java| 4 +- .../hive/io/HiveIOExceptionHandlerChain.java| 2 +- .../hive/io/HiveIOExceptionHandlerUtil.java | 4 +- .../apache/hadoop/hive/shims/ShimLoader.java| 2 +- .../hive/common/type/FastHiveDecimalImpl.java | 73 +--- .../hadoop/hive/common/type/RandomTypeUtil.java | 10 +-- .../hive/testutils/jdbc/HiveBurnInClient.java | 4 +- 67 files changed, 151 insertions(+), 243 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/71f4930d/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java -- diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index 048d744..f85d8a3 100644
hive git commit: HIVE-16196: UDFJson having thread-safety issues (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 0e62d3dcb -> 87be4b31c HIVE-16196: UDFJson having thread-safety issues (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87be4b31 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87be4b31 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87be4b31 Branch: refs/heads/master Commit: 87be4b31ce5abbe03ee8461a437c901b5ee9ed05 Parents: 0e62d3d Author: Xuefu Zhang Authored: Thu Mar 16 13:27:53 2017 -0700 Committer: Xuefu Zhang Committed: Thu Mar 16 13:27:53 2017 -0700 -- .../org/apache/hadoop/hive/ql/udf/UDFJson.java | 67 1 file changed, 41 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/87be4b31/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java index 2c42fae..0c54754 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java @@ -59,20 +59,14 @@ import org.codehaus.jackson.type.JavaType; + " [,] : Union operator\n" + " [start:end:step] : array slice operator\n") public class UDFJson extends UDF { - private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); - private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); - - private static final JsonFactory JSON_FACTORY = new JsonFactory(); - static { -// Allows for unescaped ASCII control characters in JSON values -JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); -// Enabled to accept quoting of all character backslash qooting mechanism -JSON_FACTORY.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); - } - private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY); + private static final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); + private static final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); private static final JavaType LIST_TYPE = TypeFactory.fromClass(List.class); + private final JsonFactory jsonFactory = new JsonFactory(); + private final ObjectMapper objectMapper = new ObjectMapper(jsonFactory); + // An LRU cache using a linked hash map static class HashCache extends LinkedHashMap { @@ -93,16 +87,18 @@ public class UDFJson extends UDF { } - static Map extractObjectCache = new HashCache(); - static Map pathExprCache = new HashCache(); - static Map> indexListCache = + Map extractObjectCache = new HashCache(); + Map pathExprCache = new HashCache(); + Map> indexListCache = new HashCache>(); - static Map mKeyGroup1Cache = new HashCache(); - static Map mKeyMatchesCache = new HashCache(); - - Text result = new Text(); + Map mKeyGroup1Cache = new HashCache(); + Map mKeyMatchesCache = new HashCache(); public UDFJson() { +// Allows for unescaped ASCII control characters in JSON values +jsonFactory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); +// Enabled to accept quoting of all character backslash qooting mechanism +jsonFactory.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); } /** @@ -125,13 +121,13 @@ public class UDFJson extends UDF { * @return json string or null when an error happens. */ public Text evaluate(String jsonString, String pathString) { - if (jsonString == null || jsonString.isEmpty() || pathString == null || pathString.isEmpty() || pathString.charAt(0) != '$') { return null; } int pathExprStart = 1; +boolean unknownType = pathString.equals("$"); boolean isRootArray = false; if (pathString.length() > 1) { @@ -155,23 +151,41 @@ public class UDFJson extends UDF { // Cache extractObject Object extractObject = extractObjectCache.get(jsonString); if (extractObject == null) { - JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE; - try { -extractObject = MAPPER.readValue(jsonString, javaType); - } catch (Exception e) { -return null; + if (unknownType) { +try { + extractObject = objectMapper.readValue(jsonString, LIST_TYPE); +} catch (Exception e) { + // Ignore exception +} +if (extractObject == null) { + try { +extractObject = objectMapper.readValue(jsonString, MAP_TYPE); + } catch (Exception e) { +return null; + } +
hive git commit: HIVE-16156: FileSinkOperator should delete existing output target when renaming (Reviewed by Sergey)
Repository: hive Updated Branches: refs/heads/master 8dda898ba -> 76b65baa7 HIVE-16156: FileSinkOperator should delete existing output target when renaming (Reviewed by Sergey) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76b65baa Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76b65baa Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76b65baa Branch: refs/heads/master Commit: 76b65baa7b5f5c0e5c1f99cf0621247f65fb0b00 Parents: 8dda898 Author: Xuefu Zhang Authored: Fri Mar 10 22:14:49 2017 -0800 Committer: Xuefu Zhang Committed: Fri Mar 10 22:14:49 2017 -0800 -- .../apache/hadoop/hive/ql/exec/FileSinkOperator.java| 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/76b65baa/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3bbe92d..a9d03d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -229,8 +229,18 @@ public class FileSinkOperator extends TerminalOperator implements } } if (needToRename && outPaths[idx] != null && !fs.rename(outPaths[idx], finalPaths[idx])) { -throw new HiveException("Unable to rename output from: " + +FileStatus fileStatus = FileUtils.getFileStatusOrNull(fs, finalPaths[idx]); +if (fileStatus != null) { + LOG.warn("Target path " + finalPaths[idx] + " with a size " + fileStatus.getLen() + " exists. Trying to delete it."); + if (!fs.delete(finalPaths[idx], true)) { +throw new HiveException("Unable to delete existing target output: " + finalPaths[idx]); + } +} + +if (!fs.rename(outPaths[idx], finalPaths[idx])) { + throw new HiveException("Unable to rename output from: " + outPaths[idx] + " to: " + finalPaths[idx]); +} } updateProgress(); } catch (IOException e) {
hive git commit: HIVE-15671: RPCServer.registerClient() erroneously uses server/client handshake timeout for connection timeout (reviewed by Jimmy)
Repository: hive Updated Branches: refs/heads/master 791066178 -> 401b14ac7 HIVE-15671: RPCServer.registerClient() erroneously uses server/client handshake timeout for connection timeout (reviewed by Jimmy) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/401b14ac Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/401b14ac Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/401b14ac Branch: refs/heads/master Commit: 401b14ac778ff58cbc5e76f08de002ea4edf3c57 Parents: 7910661 Author: Xuefu Zhang Authored: Mon Feb 13 11:08:53 2017 -0800 Committer: Xuefu Zhang Committed: Mon Feb 13 11:08:53 2017 -0800 -- .../src/main/java/org/apache/hive/spark/client/rpc/Rpc.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/401b14ac/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java index b2f133b..0489684 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java @@ -120,7 +120,7 @@ public class Rpc implements Closeable { } }; final ScheduledFuture timeoutFuture = eloop.schedule(timeoutTask, -rpcConf.getServerConnectTimeoutMs(), TimeUnit.MILLISECONDS); +connectTimeoutMs, TimeUnit.MILLISECONDS); // The channel listener instantiates the Rpc instance when the connection is established, // and initiates the SASL handshake.
hive git commit: HIVE-15683: Make what's done in HIVE-15580 for group by configurable (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 19a6831b9 -> 6c901fb3e HIVE-15683: Make what's done in HIVE-15580 for group by configurable (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c901fb3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c901fb3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c901fb3 Branch: refs/heads/master Commit: 6c901fb3e681edb76e3251996b14dac4ae092ce5 Parents: 19a6831 Author: Xuefu Zhang Authored: Wed Feb 8 14:58:19 2017 -0800 Committer: Xuefu Zhang Committed: Wed Feb 8 14:58:19 2017 -0800 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../hive/ql/exec/spark/GroupByShuffler.java | 11 +++-- .../hive/ql/exec/spark/HiveReduceFunction.java | 10 ++--- .../spark/HiveReduceFunctionResultList.java | 18 ++--- .../hadoop/hive/ql/exec/spark/ReduceTran.java | 8 ++-- .../hive/ql/exec/spark/RepartitionShuffler.java | 42 .../hive/ql/exec/spark/SortByShuffler.java | 2 +- .../hive/ql/exec/spark/SparkPlanGenerator.java | 6 ++- .../hive/ql/exec/spark/SparkShuffler.java | 4 +- .../clientpositive/lateral_view_explode2.q | 4 +- .../clientpositive/spark/union_remove_25.q.out | 2 +- 11 files changed, 85 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f3b01b2..e82758f 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3228,6 +3228,10 @@ public class HiveConf extends Configuration { SPARK_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE( "hive.spark.dynamic.partition.pruning.max.data.size", 100*1024*1024L, "Maximum total data size in dynamic pruning."), +SPARK_USE_GROUPBY_SHUFFLE( +"hive.spark.use.groupby.shuffle", true, +"Spark groupByKey transformation has better performance but uses unbounded memory." + +"Turn this off when there is a memory issue."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java index 8267515..9f9e3b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java @@ -20,18 +20,17 @@ package org.apache.hadoop.hive.ql.exec.spark; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; -import org.apache.spark.HashPartitioner; import org.apache.spark.api.java.JavaPairRDD; -public class GroupByShuffler implements SparkShuffler { +public class GroupByShuffler implements SparkShuffler> { @Override - public JavaPairRDD shuffle( + public JavaPairRDD> shuffle( JavaPairRDD input, int numPartitions) { -if (numPartitions < 0) { - numPartitions = 1; +if (numPartitions > 0) { + return input.groupByKey(numPartitions); } -return input.repartitionAndSortWithinPartitions(new HashPartitioner(numPartitions)); +return input.groupByKey(); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/6c901fb3/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java index 2b85872..2b6e2de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java @@ -25,8 +25,8 @@ import org.apache.hadoop.io.BytesWritable; import scala.Tuple2; -public class HiveReduceFunction extends HivePairFlatMapFunction< - Iterator>, HiveKey, BytesWritable> { +public class HiveReduceFunction extends HivePairFlatMapFunction< + Iterator>, HiveKey, BytesWritable> { private static fi
hive git commit: HIVE-15682: Eliminate per-row based dummy iterator creation (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 3e01ef326 -> 561dbe3b9 HIVE-15682: Eliminate per-row based dummy iterator creation (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/561dbe3b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/561dbe3b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/561dbe3b Branch: refs/heads/master Commit: 561dbe3b90bc5cd85a64e22ccd9e384bbf67a782 Parents: 3e01ef3 Author: Xuefu Zhang Authored: Tue Feb 7 13:48:55 2017 -0800 Committer: Xuefu Zhang Committed: Tue Feb 7 13:48:55 2017 -0800 -- .../ql/exec/spark/SparkReduceRecordHandler.java | 56 1 file changed, 35 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/561dbe3b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 44f2e4d..8251900 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -211,30 +211,44 @@ public class SparkReduceRecordHandler extends SparkRecordHandler { } /** - * TODO: Instead of creating a dummy iterator per row, we can implement a private method that's - * similar to processRow(Object key, Iterator values) but processes one row at a time. Then, - * we just call that private method here. + * A reusable dummy iterator that has only one value. + * */ - @Override - public void processRow(Object key, final Object value) throws IOException { -processRow(key, new Iterator() { - boolean done = false; - @Override - public boolean hasNext() { -return !done; - } + private static class DummyIterator implements Iterator { +private boolean done = false; +private Object value = null; - @Override - public Object next() { -done = true; -return value; - } +public void setValue(Object v) { + this.value = v; + done = false; +} - @Override - public void remove() { -throw new UnsupportedOperationException("Iterator.remove() is not implemented/supported"); - } -}); +@Override +public boolean hasNext() { + return !done; +} + +@Override +public Object next() { + done = true; + return value; +} + +@Override +public void remove() { + throw new UnsupportedOperationException("Iterator.remove() is not implemented/supported"); +} + } + + private DummyIterator dummyIterator = new DummyIterator(); + + /** + * Process one row using a dummy iterator. + */ + @Override + public void processRow(Object key, final Object value) throws IOException { +dummyIterator.setValue(value); +processRow(key, dummyIterator); } @Override
hive git commit: HIVE-15749: Add missing ASF headers (Peter via Xuefu)
Repository: hive Updated Branches: refs/heads/master 4a03fb1da -> 5c403e9fc HIVE-15749: Add missing ASF headers (Peter via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c403e9f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c403e9f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c403e9f Branch: refs/heads/master Commit: 5c403e9fc0552559914079ca480eba8b856b7ee8 Parents: 4a03fb1 Author: Xuefu Zhang Authored: Wed Feb 1 13:51:59 2017 -0800 Committer: Xuefu Zhang Committed: Wed Feb 1 13:51:59 2017 -0800 -- .../hive/common/classification/RetrySemantics.java | 17 + .../hadoop/hive/druid/io/DruidRecordWriter.java| 17 + .../hive/jdbc/TestHivePreparedStatement.java | 17 + .../hive/llap/io/encoded/LineRrOffsetReader.java | 17 + .../hive/llap/io/encoded/PassThruOffsetReader.java | 17 + .../hadoop/hive/ql/parse/TestMergeStatement.java | 17 + .../apache/hadoop/hive/ql/plan/TestMapWork.java| 17 + 7 files changed, 119 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java b/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java index abad45e..5883b01 100644 --- a/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java +++ b/common/src/java/org/apache/hadoop/hive/common/classification/RetrySemantics.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.common.classification; http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java index 1601a9a..3323cc0 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.druid.io; import com.google.common.base.Function; http://git-wip-us.apache.org/repos/asf/hive/blob/5c403e9f/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java -- diff --git a/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java b/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java index bc49aeb..2a68c91 100644 --- a/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java +++ b/jdbc/src/test/org/apache/hive/jdbc/TestHivePreparedStatement.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agree
hive git commit: HIVE-15580: Eliminate unbounded memory usage for orderBy and groupBy in Hive on Spark (reviewed by Chao Sun)
Repository: hive Updated Branches: refs/heads/master f968cf78a -> 811b3e39e HIVE-15580: Eliminate unbounded memory usage for orderBy and groupBy in Hive on Spark (reviewed by Chao Sun) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/811b3e39 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/811b3e39 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/811b3e39 Branch: refs/heads/master Commit: 811b3e39ed569232c4f138c1287109ef8ebce132 Parents: f968cf7 Author: Xuefu Zhang Authored: Fri Jan 20 12:56:49 2017 -0800 Committer: Xuefu Zhang Committed: Fri Jan 20 12:56:49 2017 -0800 -- .../hive/ql/exec/spark/GroupByShuffler.java | 10 +-- .../hive/ql/exec/spark/HiveReduceFunction.java | 4 +- .../spark/HiveReduceFunctionResultList.java | 8 +-- .../hadoop/hive/ql/exec/spark/ReduceTran.java | 4 +- .../hadoop/hive/ql/exec/spark/ShuffleTran.java | 6 +- .../hive/ql/exec/spark/SortByShuffler.java | 65 +--- .../hive/ql/exec/spark/SparkPlanGenerator.java | 7 --- .../ql/exec/spark/SparkReduceRecordHandler.java | 29 +++-- .../hive/ql/exec/spark/SparkShuffler.java | 2 +- .../queries/clientpositive/union_top_level.q| 8 +-- .../clientpositive/llap/union_top_level.q.out | 52 .../spark/lateral_view_explode2.q.out | 2 +- .../clientpositive/spark/union_remove_25.q.out | 2 +- .../clientpositive/spark/union_top_level.q.out | 62 +-- .../spark/vector_outer_join5.q.out | 40 ++-- 15 files changed, 124 insertions(+), 177 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java index e128dd2..8267515 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/GroupByShuffler.java @@ -20,21 +20,23 @@ package org.apache.hadoop.hive.ql.exec.spark; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; +import org.apache.spark.HashPartitioner; import org.apache.spark.api.java.JavaPairRDD; public class GroupByShuffler implements SparkShuffler { @Override - public JavaPairRDD> shuffle( + public JavaPairRDD shuffle( JavaPairRDD input, int numPartitions) { -if (numPartitions > 0) { - return input.groupByKey(numPartitions); +if (numPartitions < 0) { + numPartitions = 1; } -return input.groupByKey(); +return input.repartitionAndSortWithinPartitions(new HashPartitioner(numPartitions)); } @Override public String getName() { return "GroupBy"; } + } http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java index eeb4443..2b85872 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunction.java @@ -26,7 +26,7 @@ import org.apache.hadoop.io.BytesWritable; import scala.Tuple2; public class HiveReduceFunction extends HivePairFlatMapFunction< - Iterator>>, HiveKey, BytesWritable> { + Iterator>, HiveKey, BytesWritable> { private static final long serialVersionUID = 1L; @@ -37,7 +37,7 @@ public class HiveReduceFunction extends HivePairFlatMapFunction< @SuppressWarnings("unchecked") @Override public Iterator> - call(Iterator>> it) throws Exception { + call(Iterator> it) throws Exception { initJobConf(); SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler(); http://git-wip-us.apache.org/repos/asf/hive/blob/811b3e39/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java index d57cac4..8708819 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveReduceFunctionResultList.java @@ -26,7 +26,7 @@ import org.apac
hive git commit: HIVE-15543: Don't try to get memory/cores to decide parallelism when Spark dynamic allocation is enabled (Reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 9e7d384f6 -> ccc9bf3ea HIVE-15543: Don't try to get memory/cores to decide parallelism when Spark dynamic allocation is enabled (Reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccc9bf3e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccc9bf3e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccc9bf3e Branch: refs/heads/master Commit: ccc9bf3eaadadcbb3c93faa4a9ccc0e20c41dc28 Parents: 9e7d384 Author: Xuefu Zhang Authored: Thu Jan 5 10:56:02 2017 -0800 Committer: Xuefu Zhang Committed: Thu Jan 5 10:56:02 2017 -0800 -- .../spark/SetSparkReducerParallelism.java | 56 1 file changed, 33 insertions(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ccc9bf3e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java index ff4924d..7a5b71f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java @@ -53,6 +53,8 @@ public class SetSparkReducerParallelism implements NodeProcessor { private static final Logger LOG = LoggerFactory.getLogger(SetSparkReducerParallelism.class.getName()); + private static final String SPARK_DYNAMIC_ALLOCATION_ENABLED = "spark.dynamicAllocation.enabled"; + // Spark memory per task, and total number of cores private ObjectPair sparkMemoryAndCores; @@ -109,34 +111,12 @@ public class SetSparkReducerParallelism implements NodeProcessor { } } -if (sparkMemoryAndCores == null) { - SparkSessionManager sparkSessionManager = null; - SparkSession sparkSession = null; - try { -sparkSessionManager = SparkSessionManagerImpl.getInstance(); -sparkSession = SparkUtilities.getSparkSession( - context.getConf(), sparkSessionManager); -sparkMemoryAndCores = sparkSession.getMemoryAndCores(); - } catch (HiveException e) { -throw new SemanticException("Failed to get a spark session: " + e); - } catch (Exception e) { -LOG.warn("Failed to get spark memory/core info", e); - } finally { -if (sparkSession != null && sparkSessionManager != null) { - try { -sparkSessionManager.returnSession(sparkSession); - } catch (HiveException ex) { -LOG.error("Failed to return the session to SessionManager: " + ex, ex); - } -} - } -} - // Divide it by 2 so that we can have more reducers long bytesPerReducer = context.getConf().getLongVar(HiveConf.ConfVars.BYTESPERREDUCER) / 2; int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer, maxReducers, false); +getSparkMemoryAndCores(context); if (sparkMemoryAndCores != null && sparkMemoryAndCores.getFirst() > 0 && sparkMemoryAndCores.getSecond() > 0) { // warn the user if bytes per reducer is much larger than memory per task @@ -184,4 +164,34 @@ public class SetSparkReducerParallelism implements NodeProcessor { return false; } + private void getSparkMemoryAndCores(OptimizeSparkProcContext context) throws SemanticException { +if (context.getConf().getBoolean(SPARK_DYNAMIC_ALLOCATION_ENABLED, false)) { + // If dynamic allocation is enabled, numbers for memory and cores are meaningless. So, we don't + // try to get it. + sparkMemoryAndCores = null; + return; +} + +SparkSessionManager sparkSessionManager = null; +SparkSession sparkSession = null; +try { + sparkSessionManager = SparkSessionManagerImpl.getInstance(); + sparkSession = SparkUtilities.getSparkSession( + context.getConf(), sparkSessionManager); + sparkMemoryAndCores = sparkSession.getMemoryAndCores(); +} catch (HiveException e) { + throw new SemanticException("Failed to get a spark session: " + e); +} catch (Exception e) { + LOG.warn("Failed to get spark memory/core info", e); +} finally { + if (sparkSession != null && sparkSessionManager != null) { +try { + sparkSessionManager.returnSession(sparkSession); +} catch (HiveException ex) { + LOG.error("Failed to return the session to SessionManager: " + ex, ex); +} + } +} + } + }
hive git commit: HIVE-15528: Expose Spark job error in SparkTask (Zhihai via Xuefu)
Repository: hive Updated Branches: refs/heads/master 5d45974e9 -> c928ad3d3 HIVE-15528: Expose Spark job error in SparkTask (Zhihai via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c928ad3d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c928ad3d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c928ad3d Branch: refs/heads/master Commit: c928ad3d3f958d1e2e109b689fc5c6e9ee3e619b Parents: 5d45974 Author: Xuefu Zhang Authored: Tue Jan 3 10:39:39 2017 -0800 Committer: Xuefu Zhang Committed: Tue Jan 3 10:39:39 2017 -0800 -- .../org/apache/hadoop/hive/ql/exec/spark/SparkTask.java | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c928ad3d/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index f836065..87d80a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -114,7 +114,7 @@ public class SparkTask extends Task { this.jobID = jobRef.getSparkJobStatus().getAppID(); rc = jobRef.monitorJob(); SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus(); - getSparkJobInfo(sparkJobStatus); + getSparkJobInfo(sparkJobStatus, rc); if (rc == 0) { sparkStatistics = sparkJobStatus.getSparkStatistics(); if (LOG.isInfoEnabled() && sparkStatistics != null) { @@ -139,6 +139,7 @@ public class SparkTask extends Task { // org.apache.commons.lang.StringUtils console.printError(msg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); LOG.error(msg, e); + setException(e); rc = 1; } finally { startTime = perfLogger.getEndTime(PerfLogger.SPARK_SUBMIT_TO_RUNNING); @@ -196,6 +197,7 @@ public class SparkTask extends Task { String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + StringUtils.stringifyException(e)); +setException(e); } } return rc; @@ -330,7 +332,7 @@ public class SparkTask extends Task { return counters; } - private void getSparkJobInfo(SparkJobStatus sparkJobStatus) { + private void getSparkJobInfo(SparkJobStatus sparkJobStatus, int rc) { try { stageIds = new ArrayList(); int[] ids = sparkJobStatus.getStageIds(); @@ -355,6 +357,12 @@ public class SparkTask extends Task { succeededTaskCount = sumComplete; totalTaskCount = sumTotal; failedTaskCount = sumFailed; + if (rc != 0) { +Throwable error = sparkJobStatus.getError(); +if (error != null) { + setException(error); +} + } } catch (Exception e) { LOG.error("Failed to get Spark job information", e); }
hive git commit: HIVE-14617: NPE in UDF MapValues() if input is null (reviewed by Chao)
Repository: hive Updated Branches: refs/heads/master 9343fee5d -> 2f686d4c0 HIVE-14617: NPE in UDF MapValues() if input is null (reviewed by Chao) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2f686d4c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2f686d4c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2f686d4c Branch: refs/heads/master Commit: 2f686d4c0c20540079660de202c619e42ed5cd4f Parents: 9343fee Author: Xuefu Zhang Authored: Thu Aug 25 11:05:25 2016 -0700 Committer: Xuefu Zhang Committed: Thu Aug 25 11:05:25 2016 -0700 -- .../ql/udf/generic/GenericUDFMapValues.java | 6 ++- .../ql/udf/generic/TestGenericUDFMapValues.java | 56 2 files changed, 61 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2f686d4c/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java index 096ceac..3bd5864 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMapValues.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList; +import java.util.Map; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -61,7 +62,10 @@ public class GenericUDFMapValues extends GenericUDF { public Object evaluate(DeferredObject[] arguments) throws HiveException { retArray.clear(); Object mapObj = arguments[0].get(); -retArray.addAll(mapOI.getMap(mapObj).values()); +Map map = mapOI.getMap(mapObj); +if (map != null) { + retArray.addAll(map.values()); +} return retArray; } http://git-wip-us.apache.org/repos/asf/hive/blob/2f686d4c/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java new file mode 100644 index 000..44676ed --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMapValues.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class TestGenericUDFMapValues { + + @Test + public void testNullMap() throws HiveException, IOException { +ObjectInspector[] inputOIs = { +ObjectInspectorFactory.getStandardMapObjectInspector( +PrimitiveObjectInspectorFactory.writableStringObjectInspector, +PrimitiveObjectInspectorFactory.writableStringObjectInspector), +}; + +Map input = null; +DeferredObject[] args = { +new DeferredJavaObject(input) +}; + + GenericUDFMapValues udf = new GenericUDFMapValues(); +StandardListObjectInspector oi = (StandardListObjectInspector) udf.initialize(inputOIs); +Object res = udf.evaluate(args); +Assert.assertTrue(oi.getList(res).isEmpty()); +udf.close(); + } + +}
svn commit: r1733688 - /hive/cms/trunk/content/people.mdtext
Author: xuefu Date: Sat Mar 5 04:28:38 2016 New Revision: 1733688 URL: http://svn.apache.org/viewvc?rev=1733688&view=rev Log: Update Xuefu's information Modified: hive/cms/trunk/content/people.mdtext Modified: hive/cms/trunk/content/people.mdtext URL: http://svn.apache.org/viewvc/hive/cms/trunk/content/people.mdtext?rev=1733688&r1=1733687&r2=1733688&view=diff == --- hive/cms/trunk/content/people.mdtext (original) +++ hive/cms/trunk/content/people.mdtext Sat Mar 5 04:28:38 2016 @@ -214,9 +214,9 @@ tr:nth-child(2n+1) { -xuefuz +xuefu Xuefu Zhang -http://cloudera.com/";>Cloudera +
hive git commit: HIVE-13101: NullPointerException in HiveLexer.g (Sandeep via Xuefu)
Repository: hive Updated Branches: refs/heads/master bc4dcf376 -> e9b734852 HIVE-13101: NullPointerException in HiveLexer.g (Sandeep via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9b73485 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9b73485 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9b73485 Branch: refs/heads/master Commit: e9b73485281730abf73b35d9029000edd42fa35c Parents: bc4dcf3 Author: Xuefu Zhang Authored: Wed Feb 24 15:50:47 2016 -0800 Committer: Xuefu Zhang Committed: Wed Feb 24 15:50:47 2016 -0800 -- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e9b73485/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 4c4470b..3f92d16 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -31,6 +31,9 @@ import org.apache.hadoop.hive.conf.HiveConf; } protected boolean allowQuotedId() { +if(hiveConf == null){ + return false; +} String supportedQIds = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); return !"none".equals(supportedQIds); }
hive git commit: HIVE-13045: move guava dependency back to 14 after HIVE-12952 (Mohit via Xuefu)
Repository: hive Updated Branches: refs/heads/master cc8cec235 -> fd59191be HIVE-13045: move guava dependency back to 14 after HIVE-12952 (Mohit via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fd59191b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fd59191b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fd59191b Branch: refs/heads/master Commit: fd59191be047a980dec704a2a1e764fd22d22936 Parents: cc8cec2 Author: Xuefu Zhang Authored: Fri Feb 19 15:02:40 2016 -0800 Committer: Xuefu Zhang Committed: Fri Feb 19 15:02:40 2016 -0800 -- pom.xml| 2 +- .../org/apache/hive/service/cli/operation/OperationManager.java| 1 - 2 files changed, 1 insertion(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/fd59191b/pom.xml -- diff --git a/pom.xml b/pom.xml index af2e3d1..836e397 100644 --- a/pom.xml +++ b/pom.xml @@ -129,7 +129,7 @@ 1.4 10.10.2.0 3.1.0 -15.0 +14.0.1 2.4.4 2.6.0 ${basedir}/${hive.path.to.root}/testutils/hadoop http://git-wip-us.apache.org/repos/asf/hive/blob/fd59191b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java index 96c01de..1b8aca9 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java +++ b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import com.google.common.collect.EvictingQueue; import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
hive git commit: HIVE-13057: Remove duplicate copies of TableDesc property values in PartitionDesc (Mohit via Xuefu)
Repository: hive Updated Branches: refs/heads/master 487cf1595 -> cc8cec235 HIVE-13057: Remove duplicate copies of TableDesc property values in PartitionDesc (Mohit via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cc8cec23 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cc8cec23 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cc8cec23 Branch: refs/heads/master Commit: cc8cec2355f70cc066bb877dd700ac15f7a8e763 Parents: 487cf15 Author: Xuefu Zhang Authored: Fri Feb 19 14:59:21 2016 -0800 Committer: Xuefu Zhang Committed: Fri Feb 19 14:59:21 2016 -0800 -- .../org/apache/hadoop/hive/ql/plan/PartitionDesc.java | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/cc8cec23/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index b032349..4d627ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -83,7 +84,7 @@ public class PartitionDesc implements Serializable, Cloneable { } public PartitionDesc(final Partition part) throws HiveException { -PartitionDescConstructorHelper(part, Utilities.getTableDesc(part.getTable()), true); +PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true); setProperties(part.getMetadataFromPartitionSchema()); } @@ -209,7 +210,17 @@ public class PartitionDesc implements Serializable, Cloneable { } public void setProperties(final Properties properties) { +internProperties(properties); this.properties = properties; + } + + private static TableDesc getTableDesc(Table table) { +TableDesc tableDesc = Utilities.getTableDesc(table); +internProperties(tableDesc.getProperties()); +return tableDesc; + } + + private static void internProperties(Properties properties) { for (Enumeration keys = properties.propertyNames(); keys.hasMoreElements();) { String key = (String) keys.nextElement(); String oldValue = properties.getProperty(key);
hive git commit: HIVE-12205: Unify metric collection for local and remote spark client. (Chinna via Chengxiang)
Repository: hive Updated Branches: refs/heads/master a6d9bf76e -> 9829f9985 HIVE-12205: Unify metric collection for local and remote spark client. (Chinna via Chengxiang) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9829f998 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9829f998 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9829f998 Branch: refs/heads/master Commit: 9829f9985c48742a070b0f09889d8d74d24b5553 Parents: a6d9bf7 Author: chengxiang Authored: Wed Feb 17 18:36:51 2016 +0800 Committer: Xuefu Zhang Committed: Wed Feb 17 06:34:10 2016 -0800 -- .../spark/status/impl/LocalSparkJobStatus.java | 94 +++- .../spark/status/impl/RemoteSparkJobStatus.java | 35 +--- .../exec/spark/status/impl/SparkJobUtils.java | 56 .../hive/spark/client/MetricsCollection.java| 2 +- 4 files changed, 73 insertions(+), 114 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9829f998/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java index 3c15521..d4819d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/impl/LocalSparkJobStatus.java @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistics; import org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder; import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus; import org.apache.hadoop.hive.ql.exec.spark.status.SparkStageProgress; +import org.apache.hive.spark.client.MetricsCollection; +import org.apache.hive.spark.client.metrics.Metrics; import org.apache.hive.spark.counter.SparkCounters; import org.apache.spark.JobExecutionStatus; import org.apache.spark.SparkJobInfo; @@ -135,7 +137,18 @@ public class LocalSparkJobStatus implements SparkJobStatus { return null; } -Map flatJobMetric = combineJobLevelMetrics(jobMetric); +MetricsCollection metricsCollection = new MetricsCollection(); +Set stageIds = jobMetric.keySet(); +for (String stageId : stageIds) { + List taskMetrics = jobMetric.get(stageId); + for (TaskMetrics taskMetric : taskMetrics) { +Metrics metrics = new Metrics(taskMetric); +metricsCollection.addMetrics(jobId, Integer.parseInt(stageId), 0, metrics); + } +} +SparkJobUtils sparkJobUtils = new SparkJobUtils(); +Map flatJobMetric = sparkJobUtils.collectMetrics(metricsCollection +.getAllMetrics()); for (Map.Entry entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } @@ -153,85 +166,6 @@ public class LocalSparkJobStatus implements SparkJobStatus { } } - private Map combineJobLevelMetrics(Map> jobMetric) { -Map results = Maps.newLinkedHashMap(); - -long executorDeserializeTime = 0; -long executorRunTime = 0; -long resultSize = 0; -long jvmGCTime = 0; -long resultSerializationTime = 0; -long memoryBytesSpilled = 0; -long diskBytesSpilled = 0; -long bytesRead = 0; -long remoteBlocksFetched = 0; -long localBlocksFetched = 0; -long fetchWaitTime = 0; -long remoteBytesRead = 0; -long shuffleBytesWritten = 0; -long shuffleWriteTime = 0; -boolean inputMetricExist = false; -boolean shuffleReadMetricExist = false; -boolean shuffleWriteMetricExist = false; - -for (List stageMetric : jobMetric.values()) { - if (stageMetric != null) { -for (TaskMetrics taskMetrics : stageMetric) { - if (taskMetrics != null) { -executorDeserializeTime += taskMetrics.executorDeserializeTime(); -executorRunTime += taskMetrics.executorRunTime(); -resultSize += taskMetrics.resultSize(); -jvmGCTime += taskMetrics.jvmGCTime(); -resultSerializationTime += taskMetrics.resultSerializationTime(); -memoryBytesSpilled += taskMetrics.memoryBytesSpilled(); -diskBytesSpilled += taskMetrics.diskBytesSpilled(); -if (!taskMetrics.inputMetrics().isEmpty()) { - inputMetricExist = true; - bytesRead += taskMetrics.inputMetrics().get().bytesRead(); -} -Option shuffleReadMetricsOption = taskMetrics.shuffleReadMetrics(); -if (!shuffleReadMetricsOption.isEmpty()) { - shuffleReadMetricExist
hive git commit: HIVE-12951: Reduce Spark executor prewarm timeout to 5s (reviewed by Rui)
Repository: hive Updated Branches: refs/heads/master 16fb16d52 -> 2bc0aed07 HIVE-12951: Reduce Spark executor prewarm timeout to 5s (reviewed by Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2bc0aed0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2bc0aed0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2bc0aed0 Branch: refs/heads/master Commit: 2bc0aed077b29a7c6cb55c0deab5ebe8234abf71 Parents: 16fb16d Author: xzhang Authored: Thu Feb 4 10:44:41 2016 -0800 Committer: xzhang Committed: Thu Feb 4 10:44:41 2016 -0800 -- .../hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2bc0aed0/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java index 11e7116..30e53d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java @@ -72,7 +72,7 @@ public class RemoteHiveSparkClient implements HiveSparkClient { private static final String MR_JAR_PROPERTY = "tmpjars"; private static final transient Logger LOG = LoggerFactory.getLogger(RemoteHiveSparkClient.class); - private static final long MAX_PREWARM_TIME = 3; // 30s + private static final long MAX_PREWARM_TIME = 5000; // 5s private static final transient Splitter CSV_SPLITTER = Splitter.on(",").omitEmptyStrings(); private transient Map conf; @@ -115,10 +115,11 @@ public class RemoteHiveSparkClient implements HiveSparkClient { LOG.info("Finished prewarming Spark executors. The current number of executors is " + curExecutors); return; } -Thread.sleep(1000); // sleep 1 second +Thread.sleep(500); // sleep half a second } while (System.currentTimeMillis() - ts < MAX_PREWARM_TIME); - LOG.info("Timeout (60s) occurred while prewarming executors. The current number of executors is " + curExecutors); + LOG.info("Timeout (" + MAX_PREWARM_TIME + + "s) occurred while prewarming executors. The current number of executors is " + curExecutors); } }
hive git commit: HIVE-12888: TestSparkNegativeCliDriver does not run in Spark mode[Spark Branch] (Chengxiang via Xuefu)
Repository: hive Updated Branches: refs/heads/spark 8e0a10c82 -> e07826041 HIVE-12888: TestSparkNegativeCliDriver does not run in Spark mode[Spark Branch] (Chengxiang via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e0782604 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e0782604 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e0782604 Branch: refs/heads/spark Commit: e07826041e0326228ab4eeeaebe46625bbac3c99 Parents: 8e0a10c Author: Xuefu Zhang Authored: Tue Jan 26 19:31:49 2016 -0800 Committer: Xuefu Zhang Committed: Tue Jan 26 19:31:49 2016 -0800 -- ql/src/test/templates/TestNegativeCliDriver.vm | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e0782604/ql/src/test/templates/TestNegativeCliDriver.vm -- diff --git a/ql/src/test/templates/TestNegativeCliDriver.vm b/ql/src/test/templates/TestNegativeCliDriver.vm index 5f8ee8e..2ea476f 100644 --- a/ql/src/test/templates/TestNegativeCliDriver.vm +++ b/ql/src/test/templates/TestNegativeCliDriver.vm @@ -41,13 +41,17 @@ public class $className extends TestCase { static { MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode"); +String hiveConfDir = "$hiveConfDir"; String initScript = "$initScript"; String cleanupScript = "$cleanupScript"; try { String hadoopVer = "$hadoopVersion"; - qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, hadoopVer, - initScript, cleanupScript); + if (!hiveConfDir.isEmpty()) { +hiveConfDir = HIVE_ROOT + hiveConfDir; + } + qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, + hiveConfDir, hadoopVer, initScript, cleanupScript); // do a one time initialization qt.cleanUp(); qt.createSources();
hive git commit: HIVE-12708: Hive on Spark doesn't work with Kerboresed HBase [Spark Branch] (reviewed by Szehon)
Repository: hive Updated Branches: refs/heads/spark 9af0b27bd -> a116e96b7 HIVE-12708: Hive on Spark doesn't work with Kerboresed HBase [Spark Branch] (reviewed by Szehon) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a116e96b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a116e96b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a116e96b Branch: refs/heads/spark Commit: a116e96b75998b5e8632c46678cd94c551fba78a Parents: 9af0b27 Author: Xuefu Zhang Authored: Fri Dec 18 14:37:03 2015 -0800 Committer: Xuefu Zhang Committed: Fri Dec 18 14:37:03 2015 -0800 -- .../hive/ql/exec/spark/HiveSparkClientFactory.java | 11 +++ 1 file changed, 11 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a116e96b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index ec0fdea..9b2dce3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -30,6 +30,7 @@ import org.apache.commons.compress.utils.CharsetNames; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.HiveKey; @@ -67,6 +68,7 @@ public class HiveSparkClientFactory { public static Map initiateSparkConf(HiveConf hiveConf) { Map sparkConf = new HashMap(); +HBaseConfiguration.addHbaseResources(hiveConf); // set default spark configurations. sparkConf.put("spark.master", SPARK_DEFAULT_MASTER); @@ -139,7 +141,16 @@ public class HiveSparkClientFactory { if (value != null && !value.isEmpty()) { sparkConf.put("spark.hadoop." + propertyName, value); } + } else if (propertyName.startsWith("hbase")) { +// Add HBase related configuration to Spark because in security mode, Spark needs it +// to generate hbase delegation token for Spark. This is a temp solution to deal with +// Spark problem. +String value = hiveConf.get(propertyName); +sparkConf.put("spark.hadoop." + propertyName, value); +LOG.info(String.format( + "load HBase configuration (%s -> %s).", propertyName, value)); } + if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) { String value = RpcConfiguration.getValue(hiveConf, propertyName); sparkConf.put(propertyName, value);
hive git commit: HIVE-12538: After set spark related config, SparkSession never get reused (Nemon Lou via Xuefu)
Repository: hive Updated Branches: refs/heads/master 09b6f9a36 -> 305b8ce40 HIVE-12538: After set spark related config, SparkSession never get reused (Nemon Lou via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/305b8ce4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/305b8ce4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/305b8ce4 Branch: refs/heads/master Commit: 305b8ce4097a692a2ee718b1df384d98d1e6fc1a Parents: 09b6f9a Author: Xuefu Zhang Authored: Wed Dec 16 08:31:27 2015 -0800 Committer: Xuefu Zhang Committed: Wed Dec 16 08:31:27 2015 -0800 -- .../java/org/apache/hadoop/hive/conf/HiveConf.java| 4 +++- .../org/apache/hadoop/hive/conf/TestHiveConf.java | 14 ++ .../hadoop/hive/ql/exec/spark/SparkUtilities.java | 6 +- 3 files changed, 22 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 243f281..b5aee00 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2836,7 +2836,9 @@ public class HiveConf extends Configuration { // When either name or value is null, the set method below will fail, // and throw IllegalArgumentException set(name, value); - isSparkConfigUpdated = isSparkRelatedConfig(name); + if (isSparkRelatedConfig(name)) { +isSparkConfigUpdated = true; + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java -- diff --git a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java index 3b7a525..cd472c7 100644 --- a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java +++ b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java @@ -142,4 +142,18 @@ public class TestHiveConf { Assert.assertEquals("", conf2.get(HiveConf.ConfVars.METASTOREPWD.varname)); Assert.assertEquals("", conf2.get(HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname)); } + + @Test + public void testSparkConfigUpdate(){ +HiveConf conf = new HiveConf(); +Assert.assertFalse(conf.getSparkConfigUpdated()); + +conf.verifyAndSet("spark.master", "yarn-cluster"); +Assert.assertTrue(conf.getSparkConfigUpdated()); +conf.verifyAndSet("hive.execution.engine", "spark"); +Assert.assertTrue("Expected spark config updated.", conf.getSparkConfigUpdated()); + +conf.setSparkConfigUpdated(false); +Assert.assertFalse(conf.getSparkConfigUpdated()); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/305b8ce4/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java index 0268469..a61cdc5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkUtilities.java @@ -121,12 +121,16 @@ public class SparkUtilities { public static SparkSession getSparkSession(HiveConf conf, SparkSessionManager sparkSessionManager) throws HiveException { SparkSession sparkSession = SessionState.get().getSparkSession(); +HiveConf sessionConf = SessionState.get().getConf(); // Spark configurations are updated close the existing session -if (conf.getSparkConfigUpdated()) { +// In case of async queries or confOverlay is not empty, +// sessionConf and conf are different objects +if (sessionConf.getSparkConfigUpdated() || conf.getSparkConfigUpdated()) { sparkSessionManager.closeSession(sparkSession); sparkSession = null; conf.setSparkConfigUpdated(false); + sessionConf.setSparkConfigUpdated(false); } sparkSession = sparkSessionManager.getSession(sparkSession, conf, true); SessionState.get().setSparkSession(sparkSession);
hive git commit: HIVE-12568: Provide an option to specify network interface used by Spark remote client [Spark Branch] (reviewed by Jimmy)
Repository: hive Updated Branches: refs/heads/spark e4b8cf43c -> 9af0b27bd HIVE-12568: Provide an option to specify network interface used by Spark remote client [Spark Branch] (reviewed by Jimmy) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9af0b27b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9af0b27b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9af0b27b Branch: refs/heads/spark Commit: 9af0b27bda6352eb229058db57a25fe65eb81f9a Parents: e4b8cf4 Author: xzhang Authored: Mon Dec 7 11:10:25 2015 -0800 Committer: xzhang Committed: Mon Dec 7 11:10:25 2015 -0800 -- .../apache/hadoop/hive/common/ServerUtils.java | 19 +++ .../org/apache/hadoop/hive/conf/HiveConf.java | 5 ++ .../service/cli/thrift/ThriftCLIService.java| 15 +++--- .../hive/spark/client/rpc/RpcConfiguration.java | 57 +++- 4 files changed, 50 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java index 83517ce..b44f92f 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/ServerUtils.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.common; +import java.net.InetAddress; +import java.net.UnknownHostException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; @@ -47,4 +50,20 @@ public class ServerUtils { } } + /** + * Get the Inet address of the machine of the given host name. + * @param hostname The name of the host + * @return The network address of the the host + * @throws UnknownHostException + */ + public static InetAddress getHostAddress(String hostname) throws UnknownHostException { +InetAddress serverIPAddress; +if (hostname != null && !hostname.isEmpty()) { + serverIPAddress = InetAddress.getByName(hostname); +} else { + serverIPAddress = InetAddress.getLocalHost(); +} +return serverIPAddress; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9e805bd..53ef428 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2387,6 +2387,11 @@ public class HiveConf extends Configuration { "Channel logging level for remote Spark driver. One of {DEBUG, ERROR, INFO, TRACE, WARN}."), SPARK_RPC_SASL_MECHANISM("hive.spark.client.rpc.sasl.mechanisms", "DIGEST-MD5", "Name of the SASL mechanism to use for authentication."), +SPARK_RPC_SERVER_ADDRESS("hive.spark.client.rpc.server.address", "", + "The server address of HiverServer2 host to be used for communication between Hive client and remote Spark driver. " + + "Default is empty, which means the address will be determined in the same way as for hive.server2.thrift.bind.host." + + "This is only necessary if the host has mutiple network addresses and if a different network address other than " + + "hive.server2.thrift.bind.host is to be used."), SPARK_DYNAMIC_PARTITION_PRUNING( "hive.spark.dynamic.partition.pruning", false, "When dynamic pruning is enabled, joins on partition keys will be processed by writing\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/9af0b27b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java -- diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index 8434965..d54f12c 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -35,6 +35,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.common.ServerUtils; import org.apache.hive.service.AbstractService; import org.apache.hive.service.ServiceException; import org.apache.hive.service.ServiceUtils; @@ -160,21 +161,19 @@ public abstract class ThriftCLIService extends AbstractService implem
hive git commit: HIVE-12554: Fix Spark branch build after merge [Spark Branch] (Rui via Xuefu)
Repository: hive Updated Branches: refs/heads/spark 79035f1c5 -> 1a87bcc0f HIVE-12554: Fix Spark branch build after merge [Spark Branch] (Rui via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a87bcc0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a87bcc0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a87bcc0 Branch: refs/heads/spark Commit: 1a87bcc0f27e5a819035ac67fd68ace4c41301e9 Parents: 79035f1 Author: Xuefu Zhang Authored: Tue Dec 1 10:49:04 2015 -0800 Committer: Xuefu Zhang Committed: Tue Dec 1 10:49:04 2015 -0800 -- .../apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java | 3 ++- ql/src/test/results/clientpositive/gen_udf_example_add10.q.out| 1 + .../test/results/clientpositive/spark/gen_udf_example_add10.q.out | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index d215873..ec0fdea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -27,6 +27,7 @@ import java.util.Properties; import java.util.Set; import org.apache.commons.compress.utils.CharsetNames; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -133,7 +134,7 @@ public class HiveSparkClientFactory { LOG.info(String.format( "load yarn property from hive configuration in %s mode (%s -> %s).", sparkMaster, propertyName, value)); - } else if (propertyName.equals(HiveConf.ConfVars.HADOOPFS.varname)) { + } else if (propertyName.equals(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)) { String value = hiveConf.get(propertyName); if (value != null && !value.isEmpty()) { sparkConf.put("spark.hadoop." + propertyName, value); http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out -- diff --git a/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out b/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out index 984554d..cab2ec8 100644 --- a/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out +++ b/ql/src/test/results/clientpositive/gen_udf_example_add10.q.out @@ -43,6 +43,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double) sort order: -+ Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) http://git-wip-us.apache.org/repos/asf/hive/blob/1a87bcc0/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out b/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out index 05ec1f5..493d0a4 100644 --- a/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out +++ b/ql/src/test/results/clientpositive/spark/gen_udf_example_add10.q.out @@ -48,6 +48,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: double) sort order: -+ Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Select Operator
hive git commit: HIVE-12512: Include driver logs in execution-level Operation logs (Mohit via Xuefu)
Repository: hive Updated Branches: refs/heads/master e9ca6870d -> be410d24f HIVE-12512: Include driver logs in execution-level Operation logs (Mohit via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be410d24 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be410d24 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be410d24 Branch: refs/heads/master Commit: be410d24fe7e6598792b672d3fad950ed877a0b4 Parents: e9ca687 Author: Xuefu Zhang Authored: Mon Nov 30 21:40:50 2015 -0800 Committer: Xuefu Zhang Committed: Mon Nov 30 21:40:50 2015 -0800 -- .../service/cli/operation/TestOperationLoggingAPIWithMr.java | 7 --- .../service/cli/operation/TestOperationLoggingAPIWithTez.java | 6 +++--- .../apache/hive/service/cli/operation/LogDivertAppender.java | 3 ++- 3 files changed, 9 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java index 0155b75..d21571e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithMr.java @@ -36,7 +36,7 @@ import org.junit.Test; * TestOperationLoggingAPIWithMr * Test the FetchResults of TFetchType.LOG in thrift level in MR mode. */ -public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase{ +public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase { @BeforeClass public static void setUpBeforeClass() throws Exception { @@ -45,10 +45,11 @@ public class TestOperationLoggingAPIWithMr extends OperationLoggingAPITestBase{ "Parsing command", "Parse Completed", "Starting Semantic Analysis", - "Semantic Analysis Completed", - "Starting command" }; expectedLogsExecution = new String[]{ + "Total jobs", + "Starting command", + "Semantic Analysis Completed", "Number of reduce tasks determined at compile time", "number of splits", "Submitting tokens for job", http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java index ab29861..3ffc3a4 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java @@ -20,11 +20,11 @@ public class TestOperationLoggingAPIWithTez extends OperationLoggingAPITestBase expectedLogsVerbose = new String[]{ "Parsing command", "Parse Completed", - "Starting Semantic Analysis", - "Semantic Analysis Completed", - "Starting command" + "Starting Semantic Analysis" }; expectedLogsExecution = new String[]{ + "Starting command", + "Semantic Analysis Completed", "Executing on YARN cluster with App id", "Setting Tez DAG access" }; http://git-wip-us.apache.org/repos/asf/hive/blob/be410d24/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java b/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java index 9cb6439..7531778 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java +++ b/service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java @@ -23,6 +23,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.util.regex.Pattern; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apa
[2/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)
http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out index 6ae4f25..f3c10ee 100644 --- a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out +++ b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats.q.out @@ -199,55 +199,55 @@ POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 A masked pattern was here -PREHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part1') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part1') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col1 int 27 484 0 8 from deserializer -PREHOOK: query: describe formatted statsdb1.testpart1 col2 partition (part = 'part1') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col2 partition (part = 'part1') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col2 string 0 12 6.7 7 from deserializer -PREHOOK: query: describe formatted statsdb1.testpart1 col3 partition (part = 'part1') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col3 partition (part = 'part1') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col3 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col3 string 0 1 4.0 4 from deserializer -PREHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part2') +PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 -POSTHOOK: query: describe formatted statsdb1.testpart1 col1 partition (part = 'part2') +POSTHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part2') col1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: statsdb1@testpart1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment col1 int 27 484 0 18
[3/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)
HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9ca6870 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9ca6870 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9ca6870 Branch: refs/heads/master Commit: e9ca6870df889e03e8fa6888d7fbb51c4fbaf20a Parents: 3a17d42 Author: Xuefu Zhang Authored: Mon Nov 30 21:37:11 2015 -0800 Committer: Xuefu Zhang Committed: Mon Nov 30 21:37:11 2015 -0800 -- .../hive/ql/parse/DDLSemanticAnalyzer.java | 226 --- .../apache/hadoop/hive/ql/parse/HiveParser.g| 23 +- .../test/queries/clientnegative/desc_failure4.q | 5 + .../queries/clientnegative/describe_xpath1.q| 2 +- .../queries/clientnegative/describe_xpath2.q| 2 +- .../queries/clientnegative/describe_xpath3.q| 2 +- .../queries/clientnegative/describe_xpath4.q| 2 +- .../alter_partition_update_status.q | 12 +- .../alter_table_invalidate_column_stats.q | 74 ++-- .../clientpositive/alter_table_update_status.q | 10 +- .../queries/clientpositive/analyze_tbl_part.q | 8 +- .../queries/clientpositive/colstats_all_nulls.q | 4 +- .../clientpositive/columnstats_part_coltype.q | 42 +- .../clientpositive/columnstats_partlvl.q| 12 +- .../clientpositive/columnstats_partlvl_dp.q | 20 +- .../queries/clientpositive/compustat_avro.q | 4 +- .../clientpositive/confirm_initial_tbl_stats.q | 22 +- .../queries/clientpositive/describe_syntax.q| 10 +- .../queries/clientpositive/describe_table.q | 64 ++- .../queries/clientpositive/describe_xpath.q | 12 +- .../extrapolate_part_stats_full.q | 2 +- .../extrapolate_part_stats_partial.q| 4 +- .../extrapolate_part_stats_partial_ndv.q| 44 +-- .../clientpositive/partition_coltype_literals.q | 4 +- .../queries/clientpositive/stats_only_null.q| 2 +- .../results/clientnegative/desc_failure3.q.out | 2 +- .../results/clientnegative/desc_failure4.q.out | 21 + .../clientnegative/describe_xpath1.q.out| 2 +- .../clientnegative/describe_xpath2.q.out| 2 +- .../clientnegative/describe_xpath3.q.out| 2 +- .../clientnegative/describe_xpath4.q.out| 2 +- .../clientnegative/drop_database_cascade.q.out | 2 +- .../alter_partition_update_status.q.out | 20 +- .../alter_table_invalidate_column_stats.q.out | 144 +++ .../alter_table_update_status.q.out | 20 +- .../results/clientpositive/ambiguitycheck.q.out | 4 +- .../clientpositive/analyze_tbl_part.q.out | 12 +- .../clientpositive/colstats_all_nulls.q.out | 8 +- .../columnstats_part_coltype.q.out | 84 ++-- .../clientpositive/columnstats_partlvl.q.out| 24 +- .../clientpositive/columnstats_partlvl_dp.q.out | 40 +- .../results/clientpositive/compustat_avro.q.out | 8 +- .../confirm_initial_tbl_stats.q.out | 44 +-- .../clientpositive/describe_syntax.q.out| 20 +- .../results/clientpositive/describe_table.q.out | 390 ++- .../results/clientpositive/describe_xpath.q.out | 24 +- .../extrapolate_part_stats_full.q.out | 4 +- .../extrapolate_part_stats_partial.q.out| 8 +- .../extrapolate_part_stats_partial_ndv.q.out| 88 ++--- .../clientpositive/llap/stats_only_null.q.out | 4 +- .../partition_coltype_literals.q.out| 8 +- .../clientpositive/spark/stats_only_null.q.out | 4 +- .../clientpositive/stats_only_null.q.out| 4 +- .../clientpositive/tez/stats_only_null.q.out| 4 +- 54 files changed, 999 insertions(+), 612 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index eea2fcc..757542d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1717,158 +1717,65 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { } } -// assume the first component of DOT delimited name is tableName -// get the attemptTableName -static public String getAttemptTableName(Hive db, String qualifiedName, boolean isColumn) -throws SemanticException { - // check whether the name starts with table - // DESCRIBE table - // DESCRIBE table.column - // DESCRIBE table
[1/3] hive git commit: HIVE-12184: DESCRIBE of fully qualified table fails when db and table name match and non-default database is in use (Naveen via Xuefu)
Repository: hive Updated Branches: refs/heads/master 3a17d4230 -> e9ca6870d http://git-wip-us.apache.org/repos/asf/hive/blob/e9ca6870/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index 3ef6bc0..f0d8ff2 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -8,10 +8,10 @@ key string default value string default A masked pattern was here -PREHOOK: query: describe formatted src.key +PREHOOK: query: describe formatted src key PREHOOK: type: DESCTABLE PREHOOK: Input: default@src -POSTHOOK: query: describe formatted src.key +POSTHOOK: query: describe formatted src key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -27,10 +27,10 @@ key string default value string default A masked pattern was here -PREHOOK: query: describe formatted src1.value +PREHOOK: query: describe formatted src1 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src1 -POSTHOOK: query: describe formatted src1.value +POSTHOOK: query: describe formatted src1 value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -45,10 +45,10 @@ POSTHOOK: Input: default@src_json json string default A masked pattern was here -PREHOOK: query: describe formatted src_json.json +PREHOOK: query: describe formatted src_json json PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_json -POSTHOOK: query: describe formatted src_json.json +POSTHOOK: query: describe formatted src_json json POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_json # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -64,10 +64,10 @@ key string default value string default A masked pattern was here -PREHOOK: query: describe formatted src_sequencefile.value +PREHOOK: query: describe formatted src_sequencefile value PREHOOK: type: DESCTABLE PREHOOK: Input: default@src_sequencefile -POSTHOOK: query: describe formatted src_sequencefile.value +POSTHOOK: query: describe formatted src_sequencefile value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src_sequencefile # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -83,10 +83,10 @@ key int value string A masked pattern was here -PREHOOK: query: describe formatted srcbucket.value +PREHOOK: query: describe formatted srcbucket value PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket -POSTHOOK: query: describe formatted srcbucket.value +POSTHOOK: query: describe formatted srcbucket value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment @@ -102,10 +102,10 @@ key int value string A masked pattern was here -PREHOOK: query: describe formatted srcbucket2.value +PREHOOK: query: describe formatted srcbucket2 value PREHOOK: type: DESCTABLE PREHOOK: Input: default@srcbucket2 -POSTHOOK: query: describe formatted srcbucket2.value +POSTHOOK: query: describe formatted srcbucket2 value POSTHOOK: type: DESCTABLE POSTHOOK: I
[02/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 0f9d1ae..7ac16d0 100644 --- a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -109,14 +109,18 @@ STAGE PLANS: alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE -tag: 0 -value expressions: value (type: int) -auto parallelism: false +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: int) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -169,21 +173,25 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), 50 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE +tag: 1 +value expressions: _col1 (type: int) +auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -232,25 +240,29 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [a] Map 4 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), 60 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator +key expressions: _col0 (type: int) +
[66/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q -- diff --git a/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q b/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q index 9110dcc..82c18e2 100644 --- a/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q +++ b/ql/src/test/queries/clientpositive/delete_all_non_partitioned.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_danp(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_all_partitioned.q -- diff --git a/ql/src/test/queries/clientpositive/delete_all_partitioned.q b/ql/src/test/queries/clientpositive/delete_all_partitioned.q index f082b6d..122b3e2 100644 --- a/ql/src/test/queries/clientpositive/delete_all_partitioned.q +++ b/ql/src/test/queries/clientpositive/delete_all_partitioned.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_dap(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_orig_table.q -- diff --git a/ql/src/test/queries/clientpositive/delete_orig_table.q b/ql/src/test/queries/clientpositive/delete_orig_table.q index fd23f4b..88cc830 100644 --- a/ql/src/test/queries/clientpositive/delete_orig_table.q +++ b/ql/src/test/queries/clientpositive/delete_orig_table.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table; dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/0_0; http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_tmp_table.q -- diff --git a/ql/src/test/queries/clientpositive/delete_tmp_table.q b/ql/src/test/queries/clientpositive/delete_tmp_table.q index eb6c095..c7d8aa6 100644 --- a/ql/src/test/queries/clientpositive/delete_tmp_table.q +++ b/ql/src/test/queries/clientpositive/delete_tmp_table.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_no_match.q -- diff --git a/ql/src/test/queries/clientpositive/delete_where_no_match.q b/ql/src/test/queries/clientpositive/delete_where_no_match.q index 8ed979d..f13dd73 100644 --- a/ql/src/test/queries/clientpositive/delete_where_no_match.q +++ b/ql/src/test/queries/clientpositive/delete_where_no_match.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_dwnm(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q -- diff --git a/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q b/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q index dac5375..de1ca36 100644 --- a/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q +++ b/ql/src/test/queries/clientpositive/delete_where_non_partitioned.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table acid_dwnp(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/queries/clientpositive/delete_where_partitioned.q -- diff --git a/ql/src/test/queries/clientpo
[07/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index bcef03c..d8ade07 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -113,10 +113,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -134,26 +138,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: int), concat(_col1, _col7) (type: string) -outputColumnNames: _col0, _col1 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col4 +input vertices: + 0 Map 1 Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) +Select Operator + expressions: _col0 (type: int), concat(_col1, _col4) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE +value expressions: _col1 (type: string) Local Work: Map Reduce Local Work Reducer 3 @@ -285,10 +293,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys
[26/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out index 36a032a..3b634be 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out @@ -788,38 +788,46 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: string) -1 key (type: string) - input vertices: -1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: count() -mode: hash -outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - sort order: +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: string) + 1 _col0 (type: string) +input vertices: + 1 Map 3 +Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator +sort order: +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: bigint) Execution mode: llap Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) +Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE Execution mode: llap Reducer 2 Execution mode: uber @@ -900,39 +908,47 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: string) -1 key (type: string) - input vertices: -1 Map 3 - Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator -
[78/91] [abbrv] hive git commit: HIVE-8396 : Hive CliDriver command splitting can be broken when comments are present (Elliot West, reviewed by Sergey Shelukhin)
HIVE-8396 : Hive CliDriver command splitting can be broken when comments are present (Elliot West, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ae374a3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ae374a3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ae374a3 Branch: refs/heads/spark Commit: 0ae374a320d1cae523ba2b434800e97692507db8 Parents: 454c2ca Author: Sergey Shelukhin Authored: Wed Nov 25 15:13:27 2015 -0800 Committer: Sergey Shelukhin Committed: Wed Nov 25 15:13:27 2015 -0800 -- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0ae374a3/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java -- diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index e04f247..e77b7f1 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -770,6 +770,9 @@ public class CliDriver { if (!prefix.equals("")) { prefix += '\n'; } + if (line.trim().startsWith("--")) { +continue; + } if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) { line = prefix + line; ret = cli.processLine(line, true);
[39/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer5.q.out -- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 7f2e19f..d33ca0f 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -125,29 +125,37 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE TableScan alias: y Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 key (type: int) -1 key (type: int) +0 _col0 (type: int) +1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -180,10 +188,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + outputColumnNames: _col0, _col2 Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: _col0 (type: int), _col3 (type: string) +expressions: _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -217,34 +225,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE TableScan alias: n Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: int) -sort order: + -Map-reduce partition columns: key (type: int) + Select Operator +expressions: key (type: int), val
[19/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/ppd_join5.q.out -- diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out index 0807559..1b46ed5 100644 --- a/ql/src/test/results/clientpositive/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -58,31 +58,39 @@ STAGE PLANS: Filter Operator predicate: (id1 is not null and id2 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: id1 (type: string), id2 (type: string) -sort order: ++ -Map-reduce partition columns: id1 (type: string), id2 (type: string) + Select Operator +expressions: id1 (type: string), id2 (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE TableScan alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (d <= 1)) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: id (type: string), id (type: string) -sort order: ++ -Map-reduce partition columns: id (type: string), id (type: string) + Select Operator +expressions: id (type: string), d (type: int) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE -value expressions: d (type: int) +Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 id1 (type: string), id2 (type: string) -1 id (type: string), id (type: string) - outputColumnNames: _col0, _col1, _col6 +0 _col0 (type: string), _col1 (type: string) +1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -98,17 +106,21 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) TableScan -alias: c +alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -sort order: + Select Operator +expressions: d (type: int) +outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE -value expressions: d (type: int) +Reduce Output Operator + sort o
[49/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index f1aadef..85a685b 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -157,23 +157,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 1 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 1 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -274,8 +278,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [b] -/bucket_big/ds=2008-04-09 [b] +/bucket_big/ds=2008-04-08 [$hdt$_1:b] +/bucket_big/ds=2008-04-09 [$hdt$_1:b] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -379,23 +383,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 0 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 0 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -496,8 +504,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [a] -/bucket_big/ds=2008-04-09 [a] +/bucket_big/ds=2008-04-08 [$hdt$_0:a] +/bu
[65/91] [abbrv] hive git commit: HIVE-12331 : Remove hive.enforce.bucketing & hive.enforce.sorting configs (Ashutosh Chauhan via Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/5562fae7/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out -- diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out deleted file mode 100644 index b1dfd7c..000 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out +++ /dev/null @@ -1,553 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) -create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) -create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tst1 -PREHOOK: query: alter table tst1 clustered by (key) into 8 buckets -PREHOOK: type: ALTERTABLE_CLUSTER_SORT -PREHOOK: Input: default@tst1 -PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (key) into 8 buckets -POSTHOOK: type: ALTERTABLE_CLUSTER_SORT -POSTHOOK: Input: default@tst1 -POSTHOOK: Output: default@tst1 -PREHOOK: query: describe formatted tst1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@tst1 -POSTHOOK: query: describe formatted tst1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@tst1 -# col_name data_type comment - -keystring -value string - -# Partition Information -# col_name data_type comment - -ds string - -# Detailed Table Information -Database: default - A masked pattern was here -Retention: 0 - A masked pattern was here -Table Type:MANAGED_TABLE -Table Parameters: - A masked pattern was here - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed:No -Num Buckets: 8 -Bucket Columns:[key] -Sort Columns: [] -Storage Desc Params: - serialization.format1 -PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tst1@ds=1 -POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tst1@ds=1 -POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted tst1 partition (ds = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@tst1 -POSTHOOK: query: describe formatted tst1 partition (ds = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@tst1 -# col_name data_type comment - -keystring -value string - -# Partition Information -# col_name data_type comment - -ds string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: tst1 - A masked pattern was here -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles1 - numRows 500 - rawDataSize 5312 - totalSize 5812 - A masked pattern was here - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apa
[24/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index e9192a3..10b4168 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -210,41 +210,49 @@ STAGE PLANS: alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: ds (type: string) -sort order: + -Map-reduce partition columns: ds (type: string) + Select Operator +expressions: ds (type: string) +outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: llap Map 4 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) +predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator -keys: _col0 (type: string) -mode: hash + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE -Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds +Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 + Dynamic Partitioning Event Operator +Target Input: srcpart +Partition key expr: ds +Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE +Target column: ds +Target Vertex: Map 1 Execution mode: vectorized, llap Reducer 2 Execution mode: llap @@ -253,8 +261,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 ds (type: string) - 1 ds (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) Statistics: Num rows: 2200 Data size: 233
[31/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 1f1bf3d..814c947 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -135,12 +135,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE +value expressions: _col1 (type: string) Execution mode: llap Map 2 Map Operator Tree: @@ -150,28 +154,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) -outputColumnNames: _col0, _col1, _col2 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col3 +input vertices: + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false +HybridGraceHashJoin: true +Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.TextInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap Stage: Stage-0 @@ -229,11 +237,15 @@ STAGE PLANS: Filter Operator predicate: key is
[45/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index bbfa756..31a1b29 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -68,19 +68,23 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -value expressions: _col0 (type: bigint) + Select Operator +expressions: key (type: int) +outputColumnNames: _col0 +Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: int) +1 _col0 (type: int) + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -144,23 +148,27 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) + Select Operator +expressions: key (type: int) outputColumnNames: _col0 -Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator -key expressions: _col0 (type: int) -sort order: + -Map-reduce partition columns: _col0 (type: int) -value expressions: _col1 (type: bigint) +Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: int) +1 _col0 (type: int) + outputColumnNames: _col0 + Group By Operator +aggregations: count() +keys: _col0 (type: int) +mode: hash +outputColumnNames: _col0, _col1 +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -241,29 +249,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: int) - 1 key (type: int) + Select Operator +expressions: key (type: int) outputColumnNames: _col0 -Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator -key expressions: _col0 (type: int) -sort order: + -Map-reduce partition co
[38/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/cross_product_check_2.q.out -- diff --git a/ql/src/test/results/clientpositive/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/cross_product_check_2.q.out index 6910b40..57d1498 100644 --- a/ql/src/test/results/clientpositive/cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/cross_product_check_2.q.out @@ -93,7 +93,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-5:MAPRED' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A @@ -107,60 +107,68 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: -a +$hdt$_0:d1 Fetch Operator limit: -1 -d1 +$hdt$_2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -a - TableScan -alias: a -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -HashTable Sink Operator - keys: -0 -1 -d1 +$hdt$_0:d1 TableScan alias: d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) +$hdt$_2:a + TableScan +alias: a +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 key (type: string) - 1 key (type: string) + 0 + 1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan -alias: d2 +alias: d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -outputColumnNames: _col0, _col1, _col5, _col6 -Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 5 Data size: 48 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: -0 -1 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statis
[68/91] [abbrv] hive git commit: HIVE-12329 :Turn on limit pushdown optimization by default (Ashutosh Chauhan via Prasanth J)
http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out b/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out index 12920d2..bcbdf06 100644 --- a/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out +++ b/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out @@ -161,6 +161,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_char_2.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_char_2.q.out b/ql/src/test/results/clientpositive/tez/vector_char_2.q.out index 8545608..f88ee91 100644 --- a/ql/src/test/results/clientpositive/tez/vector_char_2.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -106,6 +106,7 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized @@ -234,6 +235,7 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out b/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out index be38775..617620c 100644 --- a/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -79,6 +79,7 @@ STAGE PLANS: key expressions: _col0 (type: char(10)) sort order: + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: char(20)) Execution mode: vectorized Reducer 2 @@ -179,6 +180,7 @@ STAGE PLANS: key expressions: _col0 (type: char(10)) sort order: - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: char(20)) Execution mode: vectorized Reducer 2 @@ -282,6 +284,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out -- diff --git a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out index c492113..1142485 100644 --- a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out @@ -41,6 +41,7 @@ STAGE PLANS: key expressions: null (type: double), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) sort order: ++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 Execution mode: vector
[84/91] [abbrv] hive git commit: HIVE-12465: Hive might produce wrong results when (outer) joins are merged (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-12465: Hive might produce wrong results when (outer) joins are merged (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79847387 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79847387 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79847387 Branch: refs/heads/spark Commit: 79847387699b803506ecd8b03ecc8790ee229751 Parents: f1ac5a3 Author: Jesus Camacho Rodriguez Authored: Tue Nov 24 17:20:05 2015 +0100 Committer: Jesus Camacho Rodriguez Committed: Fri Nov 27 10:10:46 2015 +0100 -- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- ql/src/test/queries/clientpositive/mergejoin.q | 12 + .../test/results/clientpositive/mergejoin.q.out | 548 +++ .../results/clientpositive/tez/mergejoin.q.out | 548 +++ 4 files changed, 1109 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1b7873d..0ff6001 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2265,7 +2265,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (rightCondAl1.size() != 0) { QBJoinTree leftTree = joinTree.getJoinSrc(); List leftTreeLeftSrc = new ArrayList(); -if (leftTree != null) { +if (leftTree != null && leftTree.getNoOuterJoin()) { String leftTreeRightSource = leftTree.getRightAliases() != null && leftTree.getRightAliases().length > 0 ? leftTree.getRightAliases()[0] : null; http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/test/queries/clientpositive/mergejoin.q -- diff --git a/ql/src/test/queries/clientpositive/mergejoin.q b/ql/src/test/queries/clientpositive/mergejoin.q index 6cd3929..82e1c93 100644 --- a/ql/src/test/queries/clientpositive/mergejoin.q +++ b/ql/src/test/queries/clientpositive/mergejoin.q @@ -132,3 +132,15 @@ select * from (select * from tab where tab.key = 0)a join (select * from tab_part where tab_part.key = 98)b on a.key = b.key full outer join tab_part c on b.key = c.key; + +set hive.cbo.enable = false; + +select * from +(select * from tab where tab.key = 0)a +full outer join +(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key; + +select * from +(select * from tab where tab.key = 0)a +join +(select * from tab_part where tab_part.key = 98)b full outer join tab_part c on a.key = b.key and b.key = c.key; http://git-wip-us.apache.org/repos/asf/hive/blob/79847387/ql/src/test/results/clientpositive/mergejoin.q.out -- diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 65f5ef5..e4a9e5b 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -3787,3 +3787,551 @@ NULLNULLNULLNULLNULLNULL97 val_97 2008-04-08 NULL NULLNULLNULLNULLNULL97 val_97 2008-04-08 NULL NULLNULLNULLNULLNULL98 val_98 2008-04-08 NULL NULLNULLNULLNULLNULL98 val_98 2008-04-08 +Warning: Shuffle Join JOIN[9][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select * from +(select * from tab where tab.key = 0)a +full outer join +(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 + A masked pattern was here +POSTHOOK: query: select * from +(select * from tab where tab.key = 0)a +full outer join +(select * from tab_part where tab_part.key = 98)b join tab_part c on a.key = b.key and b.key = c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 + A masked pattern was here +Warning: Shuffle Join JOIN[9][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select * from +(select * from tab where tab.key = 0)a +
[36/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/innerjoin.q.out -- diff --git a/ql/src/test/results/clientpositive/innerjoin.q.out b/ql/src/test/results/clientpositive/innerjoin.q.out index 91bb7b3..092065d 100644 --- a/ql/src/test/results/clientpositive/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/innerjoin.q.out @@ -34,15 +34,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -50,14 +49,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -65,10 +65,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) +expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/join1.q.out -- diff --git a/ql/src/test/results/clientpositive/join1.q.out b/ql/src/test/results/clientpositive/join1.q.out index 9ce2646..761c85e 100644 --- a/ql/src/test/results/clientpositive/join1.q.out +++ b/ql/src/test/results/clientpositive/join1.q.out @@ -34,15 +34,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -50,14 +49,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), value (type: string) +outputColumnNames:
[52/91] [abbrv] hive git commit: HIVE-12436 : Default hive.metastore.schema.verification to true (Ashutosh Chauhan via Sushanth Sowmyan)
HIVE-12436 : Default hive.metastore.schema.verification to true (Ashutosh Chauhan via Sushanth Sowmyan) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7f4379ab Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7f4379ab Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7f4379ab Branch: refs/heads/spark Commit: 7f4379ab0d9afc8ab305e7d5151984882b95cdcc Parents: 7dab21a Author: Ashutosh Chauhan Authored: Tue Nov 17 10:46:45 2015 -0800 Committer: Ashutosh Chauhan Committed: Fri Nov 20 14:49:56 2015 -0800 -- .../src/test/org/apache/hive/beeline/cli/TestHiveCli.java| 1 + beeline/src/test/resources/hive-site.xml | 5 + common/src/java/org/apache/hadoop/hive/conf/HiveConf.java| 2 +- data/conf/hive-site.xml | 5 + data/conf/llap/hive-site.xml | 5 + data/conf/spark/standalone/hive-site.xml | 5 + data/conf/spark/yarn-client/hive-site.xml| 4 data/conf/tez/hive-site.xml | 5 + .../src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java | 8 ++-- 9 files changed, 33 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/7f4379ab/beeline/src/test/org/apache/hive/beeline/cli/TestHiveCli.java -- diff --git a/beeline/src/test/org/apache/hive/beeline/cli/TestHiveCli.java b/beeline/src/test/org/apache/hive/beeline/cli/TestHiveCli.java index 21ba690..53dfa1d 100644 --- a/beeline/src/test/org/apache/hive/beeline/cli/TestHiveCli.java +++ b/beeline/src/test/org/apache/hive/beeline/cli/TestHiveCli.java @@ -278,6 +278,7 @@ public class TestHiveCli { public void setup() { System.setProperty("datanucleus.fixedDatastore", "false"); System.setProperty("datanucleus.autoCreateSchema", "true"); +System.setProperty("hive.metastore.schema.verification", "false"); cli = new HiveCli(); redirectOutputStream(); initFromFile(); http://git-wip-us.apache.org/repos/asf/hive/blob/7f4379ab/beeline/src/test/resources/hive-site.xml -- diff --git a/beeline/src/test/resources/hive-site.xml b/beeline/src/test/resources/hive-site.xml index b2347c7..615d4ed 100644 --- a/beeline/src/test/resources/hive-site.xml +++ b/beeline/src/test/resources/hive-site.xml @@ -34,6 +34,11 @@ false + + hive.metastore.schema.verification + false + + javax.jdo.option.ConnectionURL jdbc:derby:;databaseName=${test.tmp.dir}/metastore_db;create=true http://git-wip-us.apache.org/repos/asf/hive/blob/7f4379ab/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 09f3c3e..f48403b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -540,7 +540,7 @@ public class HiveConf extends Configuration { METASTORE_AUTO_CREATE_SCHEMA("datanucleus.autoCreateSchema", false, "creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once"), METASTORE_FIXED_DATASTORE("datanucleus.fixedDatastore", true, "Dictates whether to allow updates to schema or not."), -METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", false, +METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", true, "Enforce metastore schema version consistency.\n" + "True: Verify that version information stored in metastore matches with one from Hive jars. Also disable automatic\n" + " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/7f4379ab/data/conf/hive-site.xml -- diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index d15cc17..2ebb1c4 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -69,6 +69,11 @@ + hive.metastore.schema.verification + false + + + javax.jdo.option.ConnectionURL jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true http://git-wip-us.apache.org/repos/asf/hive/blob/7f4379ab/data/conf/llap/hive-site.xml -- diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml index becb5b2..9e15eda 100644 --- a
[46/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out index e3bb51d..5596ef0 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out @@ -170,23 +170,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 1 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 1 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -287,8 +291,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [b] -/bucket_big/ds=2008-04-09 [b] +/bucket_big/ds=2008-04-08 [$hdt$_1:b] +/bucket_big/ds=2008-04-09 [$hdt$_1:b] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -394,23 +398,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 0 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 0 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -511,8 +519,8 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [a] -/bucket_big/ds=2008-04-09 [a] +/bucket_big/ds=2008-04-08 [$hdt$_0:a] +/bu
[75/91] [abbrv] hive git commit: HIVE-12498: ACID: Setting OrcRecordUpdater.OrcOptions.tableProperties() has no effect (Prasanth Jayachandran reviewed by Eugene Koifman)
HIVE-12498: ACID: Setting OrcRecordUpdater.OrcOptions.tableProperties() has no effect (Prasanth Jayachandran reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3cb23b9e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3cb23b9e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3cb23b9e Branch: refs/heads/spark Commit: 3cb23b9e4b0a0e2a8edc3c2f5ee040e5ad01142d Parents: 4201179 Author: Prasanth Jayachandran Authored: Wed Nov 25 12:10:02 2015 -0600 Committer: Prasanth Jayachandran Committed: Wed Nov 25 12:10:02 2015 -0600 -- .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 3 +- .../hive/ql/io/orc/TestOrcRecordUpdater.java| 58 ++-- 2 files changed, 54 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/3cb23b9e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 67c5a11..ee31c23 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -247,7 +247,8 @@ public class OrcRecordUpdater implements RecordUpdater { writerOptions = ((OrcOptions) options).getOrcOptions(); } if (writerOptions == null) { - writerOptions = OrcFile.writerOptions(options.getConfiguration()); + writerOptions = OrcFile.writerOptions(options.getTableProperties(), + options.getConfiguration()); } writerOptions.fileSystem(fs).callback(indexBuilder); if (!options.isWritingBase()) { http://git-wip-us.apache.org/repos/asf/hive/blob/3cb23b9e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java index 22030b4..973cc40 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRecordUpdater.java @@ -18,6 +18,15 @@ package org.apache.hadoop.hive.ql.io.orc; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.PrintStream; +import java.util.Properties; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -33,12 +42,6 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Reporter; import org.junit.Test; -import java.io.DataInputStream; -import java.io.File; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - public class TestOrcRecordUpdater { @Test @@ -180,6 +183,49 @@ public class TestOrcRecordUpdater { } @Test + public void testWriterTblProperties() throws Exception { +Path root = new Path(workDir, "testWriterTblProperties"); +Configuration conf = new Configuration(); +// Must use raw local because the checksummer doesn't honor flushes. +FileSystem fs = FileSystem.getLocal(conf).getRaw(); +ObjectInspector inspector; +synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); +} +Properties tblProps = new Properties(); +tblProps.setProperty("orc.compress", "SNAPPY"); +AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) +.filesystem(fs) +.bucket(10) +.writingBase(false) +.minimumTransactionId(10) +.maximumTransactionId(19) +.inspector(inspector) +.reporter(Reporter.NULL) +.finalDestination(root) +.tableProperties(tblProps); +RecordUpdater updater = new OrcRecordUpdater(root, options); +updater.insert(11, new MyRow("first")); +updater.insert(11, new MyRow("second")); +updater.insert(11, new MyRow("third")); +updater.flush(); +updater.insert(12, new MyRow("fourth")); +updater.insert(12, new MyRow("fifth")); +updater.flush(); + +PrintStream origOut = System.out; +ByteArrayOutputStream myOut = new ByteArrayOutputStream(); +System.setOut(new PrintStream(myOut)); +FileDump.main(new String[]{root.toUri().toString()}); +System.out.flush(); +String outDump = new String(myOut.toByteArray()); +assertEquals(true, outDump.contains("Compression: SNAPPY"));
[74/91] [abbrv] hive git commit: HIVE-12520 : Fix schema_evol* tests on master (Ashutosh Chauhan via Prasanth J)
HIVE-12520 : Fix schema_evol* tests on master (Ashutosh Chauhan via Prasanth J) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4201179c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4201179c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4201179c Branch: refs/heads/spark Commit: 4201179cfafbcc71acc6c057087957d6cffd1021 Parents: 5a5e249 Author: Ashutosh Chauhan Authored: Wed Nov 25 09:40:38 2015 -0800 Committer: Ashutosh Chauhan Committed: Wed Nov 25 10:04:15 2015 -0800 -- hbase-handler/src/test/results/positive/hbase_queries.q.out | 1 + ql/src/test/queries/clientpositive/insert_values_nonascii.q | 2 +- .../clientpositive/schema_evol_orc_acid_mapwork_part.q| 2 +- .../clientpositive/schema_evol_orc_acid_mapwork_table.q | 2 +- .../clientpositive/schema_evol_orc_acidvec_mapwork_part.q | 2 +- .../clientpositive/schema_evol_orc_acidvec_mapwork_table.q| 2 +- .../clientpositive/schema_evol_orc_nonvec_fetchwork_table.q | 3 +-- .../clientpositive/schema_evol_orc_nonvec_mapwork_table.q | 2 +- .../clientpositive/schema_evol_orc_vec_mapwork_table.q| 2 +- .../queries/clientpositive/schema_evol_text_fetchwork_table.q | 2 +- .../queries/clientpositive/schema_evol_text_mapwork_table.q | 2 +- ql/src/test/results/clientpositive/limit_join_transpose.q.out | 7 +++ .../results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out | 2 ++ 13 files changed, 20 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4201179c/hbase-handler/src/test/results/positive/hbase_queries.q.out -- diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index fa348e3..1ab9877 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -176,6 +176,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/4201179c/ql/src/test/queries/clientpositive/insert_values_nonascii.q -- diff --git a/ql/src/test/queries/clientpositive/insert_values_nonascii.q b/ql/src/test/queries/clientpositive/insert_values_nonascii.q index 2e4ef41..52b34e9 100644 --- a/ql/src/test/queries/clientpositive/insert_values_nonascii.q +++ b/ql/src/test/queries/clientpositive/insert_values_nonascii.q @@ -1,6 +1,6 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + create table insert_values_nonascii(t1 char(32), t2 string); http://git-wip-us.apache.org/repos/asf/hive/blob/4201179c/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q index 681a4ac..d6919c1 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q @@ -1,7 +1,7 @@ set hive.cli.print.header=true; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; http://git-wip-us.apache.org/repos/asf/hive/blob/4201179c/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q index bde5d50..8c933e1 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q @@ -1,7 +1,7 @@ set hive.cli.print.header=true; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.enforce.bucketing=true; + SET hive.vectorized.execution.enabled=false; set
[28/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out index cec4219..bf59bfe 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out @@ -166,67 +166,72 @@ STAGE PLANS: alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 dim_shops_id (type: int) - 1 id (type: int) -outputColumnNames: _col0, _col1, _col5, _col6 -input vertices: - 1 Map 4 -Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE -HybridGraceHashJoin: true -Filter Operator - predicate: (_col1 = _col5) (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: amount (type: decimal(10,0)), dim_shops_id (type: int) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col1 (type: int) +1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: +1 Map 4 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator -expressions: _col6 (type: string), _col0 (type: decimal(10,0)) -outputColumnNames: _col6, _col0 -Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE +expressions: _col3 (type: string), _col0 (type: decimal(10,0)) +outputColumnNames: _col3, _col0 +Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) - keys: _col6 (type: string) + keys: _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) -Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Execution mode: llap Map 4 Map Operator Tree: TableScan alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) +predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats:
[06/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out b/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out index c228f0b..131c075 100644 --- a/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out +++ b/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out @@ -94,7 +94,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[10][tables = [d1, d2, a]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A @@ -119,49 +119,61 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) +Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE +value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: d2 + alias: d1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) +Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE +value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -sort order: + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -value expressions: key (type: string), value (type: string) +Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 key (type: string) - 1 key (type: string) -outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Col
[21/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/multiMapJoin1.q.out -- diff --git a/ql/src/test/results/clientpositive/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/multiMapJoin1.q.out index 8548fc7..f0d86f2 100644 --- a/ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ b/ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -189,35 +189,43 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: -firstjoin:smalltbl1 +$hdt$_0:$hdt$_1:smalltbl1 Fetch Operator limit: -1 -smalltbl2 +$hdt$_1:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -firstjoin:smalltbl1 +$hdt$_0:$hdt$_1:smalltbl1 TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator -keys: - 0 key (type: string) - 1 key (type: string) -smalltbl2 + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) +$hdt$_1:smalltbl2 TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator -keys: - 0 _col1 (type: string) - 1 value (type: string) + Select Operator +expressions: value (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 _col0 (type: string) +1 _col0 (type: string) Stage: Stage-3 Map Reduce @@ -228,30 +236,38 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -outputColumnNames: _col1 -Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: -0 _col1 (type: string) -1 value (type: string) - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: count() -mode: hash +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: _col1 (type: string) outputColumnNames: _col0 -Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) +Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +
[62/91] [abbrv] hive git commit: HIVE-12175: Upgrade Kryo version to 3.0.x (Prasanth Jayachandran reviewed by Ashutosh Chauhan)
HIVE-12175: Upgrade Kryo version to 3.0.x (Prasanth Jayachandran reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b7281ce6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b7281ce6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b7281ce6 Branch: refs/heads/spark Commit: b7281ce6a61bcfbd398fd691cddc38c4f1a61f64 Parents: c6a835c Author: Prasanth Jayachandran Authored: Tue Nov 24 12:43:46 2015 -0600 Committer: Prasanth Jayachandran Committed: Tue Nov 24 12:43:46 2015 -0600 -- itests/qtest-accumulo/pom.xml | 2 +- pom.xml | 6 +- ql/pom.xml | 36 +++-- .../apache/hadoop/hive/ql/exec/Utilities.java | 145 +-- .../org/apache/hadoop/hive/ql/plan/MapWork.java | 15 -- .../apache/hadoop/hive/ql/plan/ReduceWork.java | 5 - spark-client/pom.xml| 28 ++-- .../hive/spark/client/rpc/KryoMessageCodec.java | 11 +- 8 files changed, 185 insertions(+), 63 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/itests/qtest-accumulo/pom.xml -- diff --git a/itests/qtest-accumulo/pom.xml b/itests/qtest-accumulo/pom.xml index 7403a15..f7325dc 100644 --- a/itests/qtest-accumulo/pom.xml +++ b/itests/qtest-accumulo/pom.xml @@ -123,7 +123,7 @@ - com.esotericsoftware.kryo + com.esotericsoftware kryo ${kryo.version} test http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/pom.xml -- diff --git a/pom.xml b/pom.xml index c6df4a5..c38c10f 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,7 @@ 3.5.2 20090211 4.11 -2.22 +3.0.3 0.9.3 0.9.3 2.4 @@ -228,8 +228,8 @@ -com.esotericsoftware.kryo -kryo +com.esotericsoftware +kryo-shaded ${kryo.version} http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/ql/pom.xml -- diff --git a/ql/pom.xml b/ql/pom.xml index 9420a62..d893099 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -72,8 +72,8 @@ - com.esotericsoftware.kryo - kryo + com.esotericsoftware + kryo-shaded ${kryo.version} @@ -594,16 +594,20 @@ spark-core_${scala.binary.version} ${spark.version} true - - -org.slf4j -slf4j-log4j12 - - -commmons-logging -commons-logging - - + + + com.esotericsoftware.kryo + kryo + + + org.slf4j + slf4j-log4j12 + + + commmons-logging + commons-logging + + com.sun.jersey @@ -746,7 +750,9 @@ org.apache.hive:hive-serde org.apache.hive:hive-llap-client org.apache.hive:hive-metastore - com.esotericsoftware.kryo:kryo + com.esotericsoftware:kryo-shaded + com.esotericsoftware:minlog + org.objenesis:objenesis org.apache.parquet:parquet-hadoop-bundle org.apache.thrift:libthrift org.apache.thrift:libfb303 @@ -779,6 +785,10 @@ com.esotericsoftware org.apache.hive.com.esotericsoftware + + org.objenesis + org.apache.hive.org.objenesis + http://git-wip-us.apache.org/repos/asf/hive/blob/b7281ce6/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 9dbb45a..8b8cf6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -37,6 +37,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Array; +import java.lang.reflect.Field; import java.net.URI; import java.net.URL; import java.net.URLClassLoader; @@ -87,8 +89,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.WordUtils; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.tuple.Pair; -import org.sl
[30/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out index 72a5d0d..a1addb7 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out @@ -38,76 +38,79 @@ union all select 2 as id from tb2 limit 1) b on a.id=b.id POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing statistics. Please check log for more details. +Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Union 2 (CONTAINS) -Map 5 <- Union 2 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 3 <- Union 4 (CONTAINS) +Map 6 <- Union 4 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 5 <- Union 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_16] + Reducer 2 llap + File Output Operator [FS_17] compressed:false Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} -Merge Join Operator [MERGEJOIN_20] +Merge Join Operator [MERGEJOIN_21] | condition map:[{"":"Left Outer Join0 to 1"}] -| keys:{"0":"id (type: int)","1":"_col0 (type: int)"} +| keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -|<-Map 6 [SIMPLE_EDGE] llap -| Reduce Output Operator [RS_12] -| key expressions:id (type: int) -| Map-reduce partition columns:id (type: int) +|<-Map 1 [SIMPLE_EDGE] llap +| Reduce Output Operator [RS_13] +| key expressions:_col0 (type: int) +| Map-reduce partition columns:_col0 (type: int) | sort order:+ | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -| TableScan [TS_11] -|alias:a +| Select Operator [SEL_1] +|outputColumnNames:["_col0"] |Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -|<-Reducer 3 [SIMPLE_EDGE] llap - Reduce Output Operator [RS_13] +|TableScan [TS_0] +| alias:a +| Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +|<-Reducer 5 [SIMPLE_EDGE] llap + Reduce Output Operator [RS_14] key expressions:_col0 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit [LIM_10] + Limit [LIM_11] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_9] + Select Operator [SEL_10] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - |<-Union 2 [SIMPLE_EDGE] -|<-Map 1 [CONTAINS] llap -| Reduce Output Operator [RS_8] + |<-Union 4 [SIMPLE_EDGE] +|<-Map 3 [CONTAINS] llap +| Reduce Output Operator [RS_9] | sort order: | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE | value expressions:_col0 (type: int) -| Limit [LIM_7] +| Limit [LIM_8] |Number of rows:1 |Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE -|Select Operator [SEL_1] +|Select Operator [SEL_3] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -| Tabl
[40/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer13.q.out -- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out index d652d87..8771f1c 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -65,11 +65,11 @@ STAGE PLANS: Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) -outputColumnNames: c3, c1 +outputColumnNames: _col0, _col1 Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: c3 (type: string), c1 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE @@ -112,7 +112,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -159,26 +159,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan -alias: x1 +alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c1 is not null) and c3 is not null) (type: boolean) - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + predicate: c2 > 100) and (c1 < 120)) and c1 is not null) and c3 is not null) (type: boolean) + Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) -outputColumnNames: c3, c1 -Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE +outputColumnNames: _col0, _col1 +Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: c3 (type: string), c1 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) -Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 29 Data size: 647 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -186,11 +186,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 14 Data size: 312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer4.q.out -- diff --git a/ql/src/test/re
[69/91] [abbrv] hive git commit: HIVE-12329 :Turn on limit pushdown optimization by default (Ashutosh Chauhan via Prasanth J)
http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out -- diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out index 64a3ea2..8608187 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_15.q.out @@ -124,6 +124,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) auto parallelism: false Path -> Alias: @@ -382,6 +384,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) auto parallelism: false Path -> Alias: @@ -588,6 +592,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) auto parallelism: false Path -> Alias: @@ -827,6 +833,8 @@ STAGE PLANS: sort order: + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE tag: -1 +TopN: 10 +TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) auto parallelism: false Local Work: http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index a234ff5..932fdcc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -65,6 +65,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 3 Reduce Operator Tree: Select Operator @@ -238,6 +239,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator @@ -411,6 +413,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Reducer 4 Reduce Operator Tree: Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out index a99cb74..84f68a3 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out @@ -262,6 +262,8 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE tag: -1 + TopN: 1 + TopN Hash Memory Usage: 0.1 auto parallelism: false Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/daf46aa7/ql/src/test/results/clientpositive/spark/ctas.q.out ---
[59/91] [abbrv] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out -- diff --git a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out b/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out deleted file mode 100644 index 626dcff..000 --- a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out +++ /dev/null @@ -1,465 +0,0 @@ -PREHOOK: query: -- partitioned table analyze - -create table dummy (key string, value string) partitioned by (ds string, hr string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dummy -POSTHOOK: query: -- partitioned table analyze - -create table dummy (key string, value string) partitioned by (ds string, hr string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dummy -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') -PREHOOK: type: LOAD - A masked pattern was here -PREHOOK: Output: default@dummy -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12') -POSTHOOK: type: LOAD - A masked pattern was here -POSTHOOK: Output: default@dummy -POSTHOOK: Output: default@dummy@ds=2008/hr=12 -PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') -PREHOOK: type: LOAD - A masked pattern was here -PREHOOK: Output: default@dummy -POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11') -POSTHOOK: type: LOAD - A masked pattern was here -POSTHOOK: Output: default@dummy -POSTHOOK: Output: default@dummy@ds=2008/hr=11 -PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@dummy -PREHOOK: Input: default@dummy@ds=2008/hr=11 -PREHOOK: Input: default@dummy@ds=2008/hr=12 -PREHOOK: Output: default@dummy -PREHOOK: Output: default@dummy@ds=2008/hr=11 -PREHOOK: Output: default@dummy@ds=2008/hr=12 -POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dummy -POSTHOOK: Input: default@dummy@ds=2008/hr=11 -POSTHOOK: Input: default@dummy@ds=2008/hr=12 -POSTHOOK: Output: default@dummy -POSTHOOK: Output: default@dummy@ds=2008/hr=11 -POSTHOOK: Output: default@dummy@ds=2008/hr=12 -PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dummy -POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dummy -# col_name data_type comment - -keystring -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008, 11] -Database: default -Table: dummy - A masked pattern was here -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles1 - numRows 500 - rawDataSize 5312 - totalSize 5812 - A masked pattern was here - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed:No -Num Buckets: -1 -Bucket Columns:[] -Sort Columns: [] -Storage Desc Params: - serialization.format1 -PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@dummy -POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@dummy -# col_name data_type comment - -keystring -value string
[09/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index fed923c..4133fda 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -173,11 +173,15 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 key (type: int) -1 key (type: int) - Position of Big Table: 1 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +Position of Big Table: 1 Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -252,48 +256,52 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 key (type: int) -1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: -0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) -outputColumnNames: _col0, _col1, _col2 +Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col3 +input vertices: + 0 Map 1 +Position of Big Table: 1 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - GlobalTableId: 1 - A masked pattern was here - NumFilesPerFileSink: 1 +BucketMapJoin: true +Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - A masked pattern was here - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: -bucket_count -1 -columns key,value1,value2 -columns.comments -columns.types string:string:string - A masked pattern was here -name default.bucketmapjoin_tmp_result -serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} -serialization.format 1 -serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - A masked pattern was here -
[04/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 5a77830..2eb0c3b 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -79,12 +79,16 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: p_name (type: string) - sort order: + - Map-reduce partition columns: p_name (type: string) +Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) +Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -93,12 +97,16 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -Reduce Output Operator - key expressions: p2_name (type: string) - sort order: + - Map-reduce partition columns: p2_name (type: string) +Select Operator + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Reduce Output Operator +key expressions: _col1 (type: string) +sort order: + +Map-reduce partition columns: _col1 (type: string) +Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 4 Map Operator Tree: TableScan @@ -107,12 +115,16 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -Reduce Output Operator - key expressions: p3_name (type: string) - sort order: + - Map-reduce partition columns: p3_name (type: string) +Select Operator + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) +
[18/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoin.q.out -- diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index 22a9421..13c4470 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -101,15 +101,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -117,14 +116,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), value (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -133,10 +133,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) +expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -175,9 +175,9 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col2 Select Operator -expressions: UDFToInteger(_col2) (type: int), _col1 (type: string) +expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -595,15 +595,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string) +outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -611,14 +610,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
[01/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/spark eddb8ca1d -> 79035f1c5 http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/spark/join_star.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_star.q.out b/ql/src/test/results/clientpositive/spark/join_star.q.out index 69c2fd7..487e0c9 100644 --- a/ql/src/test/results/clientpositive/spark/join_star.q.out +++ b/ql/src/test/results/clientpositive/spark/join_star.q.out @@ -148,10 +148,14 @@ STAGE PLANS: Filter Operator predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 d1 (type: int) -1 f1 (type: int) +Select Operator + expressions: f1 (type: int), f2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator +keys: + 0 _col2 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -167,27 +171,31 @@ STAGE PLANS: Filter Operator predicate: d1 is not null (type: boolean) Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE -Map Join Operator - condition map: - Inner Join 0 to 1 - keys: -0 d1 (type: int) -1 f1 (type: int) - outputColumnNames: _col0, _col1, _col8 - input vertices: -1 Map 2 - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) -outputColumnNames: _col0, _col1, _col2 +Select Operator + expressions: m1 (type: int), m2 (type: int), d1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Inner Join 0 to 1 +keys: + 0 _col2 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0, _col1, _col4 +input vertices: + 1 Map 2 Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.TextInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -239,10 +247,14 @@ STAGE PLANS: Filter Operator predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -Spark HashTable Sink Operator - keys: -0 d1 (type: int) -1 f1 (type: int) +Select Operator + expressions: f1 (type: int), f2 (type: int) + outputColumnNames: _co
[14/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoinopt4.q.out -- diff --git a/ql/src/test/results/clientpositive/skewjoinopt4.q.out b/ql/src/test/results/clientpositive/skewjoinopt4.q.out index 1d2a5a4..28fb7df 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt4.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt4.q.out @@ -62,43 +62,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 key (type: string) -1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3 -Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -133,40 +137,44 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + ke
[81/91] [abbrv] hive git commit: HIVE-12503 : GBY-Join transpose rule may go in infinite loop (Ashutosh Chauhan via Jesus Camacho Rodriguez)
HIVE-12503 : GBY-Join transpose rule may go in infinite loop (Ashutosh Chauhan via Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a9d3b096 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a9d3b096 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a9d3b096 Branch: refs/heads/spark Commit: a9d3b096b216677ec59ab68f749de5077e0d4e51 Parents: 9a1f769 Author: Ashutosh Chauhan Authored: Thu Nov 26 11:39:50 2015 -0800 Committer: Ashutosh Chauhan Committed: Thu Nov 26 11:40:55 2015 -0800 -- .../rules/HiveAggregateJoinTransposeRule.java | 17 ++- .../queries/clientpositive/cbo_rp_auto_join1.q | 2 +- .../clientpositive/cbo_rp_auto_join1.q.out | 125 +- .../clientpositive/groupby_join_pushdown.q.out | 128 +-- 4 files changed, 138 insertions(+), 134 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a9d3b096/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index c59af39..8cbaed0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; @@ -295,15 +296,13 @@ public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule { Mappings.apply(mapping, aggregate.getGroupSet()), Mappings.apply2(mapping, aggregate.getGroupSets()), newAggCalls); } -call.transformTo(r); -// Add original tree as well for potential alternative transformation. -// This is modeled after LoptOptimizeJoinRule::findBestOrderings() in -// which rule adds multiple transformations and Planner picks the cheapest one. -// Hep planner will automatically pick the one with lower cost among two. -// For details, see: HepPlanner:applyTransformationResults() -// In this case, if ndv is close to # of rows, i.e., group by is not resulting -// in any deduction, doing this transformation is not useful. -call.transformTo(aggregate); + +// Make a cost based decision to pick cheaper plan +RelOptCost afterCost = RelMetadataQuery.getCumulativeCost(r); +RelOptCost beforeCost = RelMetadataQuery.getCumulativeCost(aggregate); +if (afterCost.isLt(beforeCost)) { + call.transformTo(r); +} } /** Computes the closure of a set of columns according to a given list of http://git-wip-us.apache.org/repos/asf/hive/blob/a9d3b096/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q -- diff --git a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q index b906db2..cbfb5d5 100644 --- a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q +++ b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q @@ -3,7 +3,7 @@ set hive.stats.fetch.column.stats=true; ; set hive.exec.reducers.max = 1; - +set hive.transpose.aggr.join=true; -- SORT_QUERY_RESULTS CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; http://git-wip-us.apache.org/repos/asf/hive/blob/a9d3b096/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out -- diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 6537a8a..59a2f12 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -933,8 +933,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -947,41 +949,67 @@ STAGE PLANS: predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: CO
[48/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 013bc07..17212b8 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -153,23 +153,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 1 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 1 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -222,7 +226,7 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [b] +/bucket_big/ds=2008-04-08 [$hdt$_1:b] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -326,23 +330,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 key (type: string) - 1 key (type: string) -Position of Big Table: 0 -BucketMapJoin: true -Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator -sort order: -tag: -1 -value expressions: _col0 (type: bigint) -auto parallelism: false + Select Operator +expressions: key (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE +Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) + Position of Big Table: 0 + BucketMapJoin: true + Group By Operator +aggregations: count() +mode: hash +outputColumnNames: _col0 +Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: A masked pattern was here Path -> Partition: @@ -395,7 +403,7 @@ STAGE PLANS: name: default.bucket_big name: default.bucket_big Truncated Path -> Alias: -/bucket_big/ds=2008-04-08 [a] +/bucket_big/ds=2008-04-08 [$hdt$_0:a] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -499,7 +507,7 @@ STAGE PLANS: Stage: Stage-6
[79/91] [abbrv] hive git commit: HIVE-12476: Metastore NPE on Oracle with Direct SQL (Jason Dere, reviewed by Sushanth Sowmyan)
HIVE-12476: Metastore NPE on Oracle with Direct SQL (Jason Dere, reviewed by Sushanth Sowmyan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/31cc894d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/31cc894d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/31cc894d Branch: refs/heads/spark Commit: 31cc894dfc53df23e9ef981942cec4317967d00b Parents: 0ae374a Author: Jason Dere Authored: Wed Nov 25 15:20:51 2015 -0800 Committer: Jason Dere Committed: Wed Nov 25 15:20:51 2015 -0800 -- .../hadoop/hive/metastore/MetaStoreDirectSql.java | 12 1 file changed, 12 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/31cc894d/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java -- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 08153ca..d76e77f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -646,6 +646,10 @@ class MetaStoreDirectSql { public void apply(Partition t, Object[] fields) { t.putToParameters((String)fields[1], (String)fields[2]); }}); +// Perform conversion of null map values +for (Partition t : partitions.values()) { + t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); +} queryText = "select \"PART_ID\", \"PART_KEY_VAL\" from \"PARTITION_KEY_VALS\"" + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0" @@ -673,6 +677,10 @@ class MetaStoreDirectSql { public void apply(StorageDescriptor t, Object[] fields) { t.putToParameters((String)fields[1], (String)fields[2]); }}); +// Perform conversion of null map values +for (StorageDescriptor t : sds.values()) { + t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); +} queryText = "select \"SD_ID\", \"COLUMN_NAME\", \"SORT_COLS\".\"ORDER\" from \"SORT_COLS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" @@ -810,6 +818,10 @@ class MetaStoreDirectSql { public void apply(SerDeInfo t, Object[] fields) { t.putToParameters((String)fields[1], (String)fields[2]); }}); +// Perform conversion of null map values +for (SerDeInfo t : serdes.values()) { + t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); +} return orderedResult; }
[73/91] [abbrv] hive git commit: HIVE-12469 : Bump Commons-Collections dependency from 3.2.1 to 3.2.2. to address vulnerability (Ashutosh Chauhan via Sergio Pena, Reuben Kuhnert)
HIVE-12469 : Bump Commons-Collections dependency from 3.2.1 to 3.2.2. to address vulnerability (Ashutosh Chauhan via Sergio Pena, Reuben Kuhnert) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5a5e2490 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5a5e2490 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5a5e2490 Branch: refs/heads/spark Commit: 5a5e249040386f21ca0cc1eebdaca50180989d76 Parents: f180379 Author: Ashutosh Chauhan Authored: Thu Nov 19 11:06:37 2015 -0800 Committer: Ashutosh Chauhan Committed: Wed Nov 25 09:49:19 2015 -0800 -- accumulo-handler/pom.xml | 8 +++- ant/pom.xml | 6 ++ common/pom.xml | 6 +- hplsql/pom.xml | 5 - pom.xml | 8 +++- 5 files changed, 25 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5a5e2490/accumulo-handler/pom.xml -- diff --git a/accumulo-handler/pom.xml b/accumulo-handler/pom.xml index 759c718..9d5185b 100644 --- a/accumulo-handler/pom.xml +++ b/accumulo-handler/pom.xml @@ -39,7 +39,13 @@ org.apache.accumulo accumulo-core - + + +commons-collections +commons-collections + + + org.apache.accumulo accumulo-fate http://git-wip-us.apache.org/repos/asf/hive/blob/5a5e2490/ant/pom.xml -- diff --git a/ant/pom.xml b/ant/pom.xml index a1f7921..9706572 100644 --- a/ant/pom.xml +++ b/ant/pom.xml @@ -53,6 +53,12 @@ org.apache.velocity velocity ${velocity.version} + + +commons-collections +commons-collections + + http://git-wip-us.apache.org/repos/asf/hive/blob/5a5e2490/common/pom.xml -- diff --git a/common/pom.xml b/common/pom.xml index cd14581..ee74282 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -91,7 +91,11 @@ ${hadoop.version} true - + +commons-collections +commons-collections + + org.slf4j slf4j-log4j12 http://git-wip-us.apache.org/repos/asf/hive/blob/5a5e2490/hplsql/pom.xml -- diff --git a/hplsql/pom.xml b/hplsql/pom.xml index 0aa647b..6329002 100644 --- a/hplsql/pom.xml +++ b/hplsql/pom.xml @@ -39,11 +39,6 @@ ${guava.version} -commons-collections -commons-collections -3.2.1 - - commons-cli commons-cli ${commons-cli.version} http://git-wip-us.apache.org/repos/asf/hive/blob/5a5e2490/pom.xml -- diff --git a/pom.xml b/pom.xml index c38c10f..34bdbf6 100644 --- a/pom.xml +++ b/pom.xml @@ -111,6 +111,7 @@ 3.2.9 1.2 1.4 +3.2.2 1.9 1.1 3.0.1 @@ -303,7 +304,12 @@ commons-codec ${commons-codec.version} - + +commons-collections +commons-collections +${commons-collections.version} + + commons-httpclient commons-httpclient ${commons-httpclient.version}
[57/91] [abbrv] hive git commit: HIVE-12489 : Analyze for partition fails if partition value has special characters (Thomas Friedrich via Ashutosh Chauhan)
HIVE-12489 : Analyze for partition fails if partition value has special characters (Thomas Friedrich via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0c1de975 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0c1de975 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0c1de975 Branch: refs/heads/spark Commit: 0c1de975f896d7a6e4a0f55ac0416dc75ea9c8e5 Parents: 2604cf2 Author: Thomas Friedrich Authored: Fri Nov 20 13:55:00 2015 -0800 Committer: Ashutosh Chauhan Committed: Mon Nov 23 11:46:19 2015 -0800 -- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +-- .../queries/clientpositive/analyze_tbl_part.q | 12 + .../clientpositive/analyze_tbl_part.q.out | 52 3 files changed, 67 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0c1de975/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 543bc0f..832a5bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -199,7 +199,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { if (partColType.equals(serdeConstants.STRING_TYPE_NAME) || partColType.contains(serdeConstants.VARCHAR_TYPE_NAME) || partColType.contains(serdeConstants.CHAR_TYPE_NAME)) { - returnVal = "'" + partVal + "'"; + returnVal = "'" + escapeSQLString(partVal) + "'"; } else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) { returnVal = partVal+"Y"; } else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) { @@ -212,10 +212,10 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { returnVal = partVal + "BD"; } else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) || partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { - returnVal = partColType + " '" + partVal + "'"; + returnVal = partColType + " '" + escapeSQLString(partVal) + "'"; } else { //for other usually not used types, just quote the value - returnVal = "'" + partVal + "'"; + returnVal = "'" + escapeSQLString(partVal) + "'"; } return returnVal; http://git-wip-us.apache.org/repos/asf/hive/blob/0c1de975/ql/src/test/queries/clientpositive/analyze_tbl_part.q -- diff --git a/ql/src/test/queries/clientpositive/analyze_tbl_part.q b/ql/src/test/queries/clientpositive/analyze_tbl_part.q index c9e45b6..ecf1389 100644 --- a/ql/src/test/queries/clientpositive/analyze_tbl_part.q +++ b/ql/src/test/queries/clientpositive/analyze_tbl_part.q @@ -15,3 +15,15 @@ ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for colum describe formatted src_stat_part.key PARTITION(partitionId=1); describe formatted src_stat_part.value PARTITION(partitionId=2); + +create table src_stat_string_part(key string, value string) partitioned by (partitionName string); + +insert overwrite table src_stat_string_part partition (partitionName="p'1") +select * from src1; + +insert overwrite table src_stat_string_part partition (partitionName="p\"1") +select * from src1; + +ANALYZE TABLE src_stat_string_part partition (partitionName="p'1") COMPUTE STATISTICS for columns key, value; + +ANALYZE TABLE src_stat_string_part partition (partitionName="p\"1") COMPUTE STATISTICS for columns key, value; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/0c1de975/ql/src/test/results/clientpositive/analyze_tbl_part.q.out -- diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index 40b926c..464bdf7 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -81,3 +81,55 @@ POSTHOOK: Input: default@src_stat_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment value string 0
[41/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/correlationoptimizer1.q.out -- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer1.q.out b/ql/src/test/results/clientpositive/correlationoptimizer1.q.out index c5c9d9c..4a09600 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer1.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer1.q.out @@ -31,35 +31,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan -alias: y -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +alias: x +Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 -Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan -alias: x -Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE +alias: y +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 -Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -67,24 +67,20 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col1 (type: string) -outputColumnNames: _col0 + Group By Operator +aggregations: count(1) +keys: _col0 (type: string) +mode: hash +outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -186,35 +182,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan -alias:
[88/91] [abbrv] hive git commit: HIVE-12338: Add webui to HiveServer2 (Jimmy, reviewed by Mohit, Szehon, Lefty)
http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/css/bootstrap.min.css -- diff --git a/service/src/resources/hive-webapps/static/css/bootstrap.min.css b/service/src/resources/hive-webapps/static/css/bootstrap.min.css new file mode 100755 index 000..0f6fbcd --- /dev/null +++ b/service/src/resources/hive-webapps/static/css/bootstrap.min.css @@ -0,0 +1,9 @@ +/*! + * Bootstrap v3.0.0 + * + * Copyright 2013 Twitter, Inc + * Licensed under the Apache License v2.0 + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Designed and built with all the love in the world by @mdo and @fat. + *//*! normalize.css v2.1.0 | MIT License | git.io/normalize */article,aside,details,figcaption,figure,footer,header,hgroup,main,nav,section,summary{display:block}audio,canvas,video{display:inline-block}audio:not([controls]){display:none;height:0}[hidden]{display:none}html{font-family:sans-serif;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:focus{outline:thin dotted}a:active,a:hover{outline:0}h1{margin:.67em 0;font-size:2em}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:bold}dfn{font-style:italic}hr{height:0;-moz-box-sizing:content-box;box-sizing:content-box}mark{color:#000;background:#ff0}code,kbd,pre,samp{font-family:monospace,serif;font-size:1em}pre{white-space:pre-wrap}q{quotes:"\201C" "\201D" "\2018" "\2019"}small{font-size:80%}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{border:0}svg:not(:root){overflow:hidden}figure{margin:0}fieldset{padding:.35em .625em .75em;margin:0 2px;border:1px solid #c0c0c0}legend{padding:0;border:0}button,input,select,textarea{margin:0;font-family:inherit;font-size:100%}button,input{line-height:normal}button,select{text-transform:none}button,html input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}button[disabled],html input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{padding:0;box-sizing:border-box}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}textarea{overflow:auto;vertical-align:top}table{border-collapse:collapse;border-spacing:0}@media print{*{color:#000!important;text-shadow:none!important;background:transparent!important;box-shadow:none!important}a,a:visited{text-decorati on:underline}a[href]:after{content:" (" attr(href) ")"}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100%!important}@page{margin:2cm .5cm}p,h2,h3{orphans:3;widows:3}h2,h3{page-break-after:avoid}.navbar{display:none}.table td,.table th{background-color:#fff!important}.btn>.caret,.dropup>.btn>.caret{border-top-color:#000!important}.label{border:1px solid #000}.table{border-collapse:collapse!important}.table-bordered th,.table-bordered td{border:1px solid #ddd!important}}*,*:before,*:after{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:62.5%;-webkit-tap-highlight-color:rgba(0,0,0,0)}body{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:1.428571429;color:#333;background-color:#fff}input,button,select ,textarea{font-family:inherit;font-size:inherit;line-height:inherit}button,input,select[multiple],textarea{background-image:none}a{color:#428bca;text-decoration:none}a:hover,a:focus{color:#2a6496;text-decoration:underline}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}img{vertical-align:middle}.img-responsive{display:block;height:auto;max-width:100%}.img-rounded{border-radius:6px}.img-thumbnail{display:inline-block;height:auto;max-width:100%;padding:4px;line-height:1.428571429;background-color:#fff;border:1px solid #ddd;border-radius:4px;-webkit-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.img-circle{border-radius:50%}hr{margin-top:20px;margin-bottom:20px;border:0;border-top:1px solid #eee}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0 0 0 0);border:0}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:16.098px;font-weight:200;line-height:1.4}@media(min-w idth:768px){.lead{font-size:21px}}small{font-size:85%}cite{font-style:normal}.text-muted{color:#999}.text-primary{color:#428bca}.text-warning{color:#c09853}.text-danger{color:#b94a48}.text-success{color:#468847}.text-info{color:#3a87ad}.text-left{text-ali
[27/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/llap/explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 4b8ec3f..a9b439e 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -3396,7 +3396,7 @@ Stage-0 Merge Join Operator [MERGEJOIN_37] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"0":"_col1 (type: int)","1":"_col0 (type: int)"} - | outputColumnNames:["_col1","_col2"] + | outputColumnNames:["_col2","_col4"] | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 2 [SIMPLE_EDGE] | Reduce Output Operator [RS_22] @@ -3977,112 +3977,115 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_38] + File Output Operator [FS_37] compressed:false -Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE +Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} -Select Operator [SEL_37] +Select Operator [SEL_36] | outputColumnNames:["_col0","_col1"] -| Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE +| Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_36] + Reduce Output Operator [RS_35] key expressions:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE value expressions:_col1 (type: int) - Merge Join Operator [MERGEJOIN_48] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{} - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_30] - | sort order: - | Statistics:Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col0 (type: string), _col1 (type: int) - | Filter Operator [FIL_41] - |predicate:_col2 is null (type: boolean) - |Statistics:Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE - |Merge Join Operator [MERGEJOIN_47] - || condition map:[{"":"Left Outer Join0 to 1"}] - || keys:{"0":"UDFToDouble(_col1) (type: double)","1":"_col0 (type: double)"} - || outputColumnNames:["_col0","_col1","_col2"] - || Statistics:Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE - ||<-Map 1 [SIMPLE_EDGE] - || Reduce Output Operator [RS_27] - || key expressions:UDFToDouble(_col1) (type: double) - || Map-reduce partition columns:UDFToDouble(_col1) (type: double) - || sort order:+ - || Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - || value expressions:_col0 (type: string), _col1 (type: int) - || Select Operator [SEL_2] - ||outputColumnNames:["_col0","_col1"] - ||Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - ||TableScan [TS_0] - || alias:part - || Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - ||<-Reducer 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_28] - | key expressions:_col0 (type: double) - | Map-reduce partition colum
[32/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/join_star.q.out -- diff --git a/ql/src/test/results/clientpositive/join_star.q.out b/ql/src/test/results/clientpositive/join_star.q.out index a75b48d..09db1f6 100644 --- a/ql/src/test/results/clientpositive/join_star.q.out +++ b/ql/src/test/results/clientpositive/join_star.q.out @@ -139,21 +139,25 @@ STAGE PLANS: Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: -dim1 +$hdt$_1:dim1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -dim1 +$hdt$_1:dim1 TableScan alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator -keys: - 0 d1 (type: int) - 1 f1 (type: int) + Select Operator +expressions: f1 (type: int), f2 (type: int) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 _col2 (type: int) +1 _col0 (type: int) Stage: Stage-3 Map Reduce @@ -164,25 +168,29 @@ STAGE PLANS: Filter Operator predicate: d1 is not null (type: boolean) Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Map Join Operator -condition map: - Inner Join 0 to 1 -keys: - 0 d1 (type: int) - 1 f1 (type: int) -outputColumnNames: _col0, _col1, _col8 -Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE -Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) - outputColumnNames: _col0, _col1, _col2 + Select Operator +expressions: m1 (type: int), m2 (type: int), d1 (type: int) +outputColumnNames: _col0, _col1, _col2 +Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Inner Join 0 to 1 + keys: +0 _col2 (type: int) +1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.TextInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -225,35 +233,43 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: -dim1 +$hdt$_1:dim1 Fetch Operator limit: -1 -dim2 +$hdt$_2:dim2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -dim1 +$hdt$_1:dim1 TableScan alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator -keys: - 0 d1 (type: int) - 1 f1 (type: int) -
[72/91] [abbrv] hive git commit: HIVE-12463: VectorMapJoinFastKeyStore has Array OOB errors (Gopal V, reviewed by Sergey Shelukhin)
HIVE-12463: VectorMapJoinFastKeyStore has Array OOB errors (Gopal V, reviewed by Sergey Shelukhin) Signed-off-by: Gopal V Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1803799 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1803799 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1803799 Branch: refs/heads/spark Commit: f180379955bce04e81c4f799f16d184d74f7ef4c Parents: 6325122 Author: Gopal V Authored: Wed Nov 25 01:01:15 2015 -0800 Committer: Gopal V Committed: Wed Nov 25 01:01:15 2015 -0800 -- .../mapjoin/fast/VectorMapJoinFastKeyStore.java | 17 ++--- .../apache/hadoop/hive/serde2/WriteBuffers.java | 69 ++-- 2 files changed, 38 insertions(+), 48 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f1803799/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java index 58af4eb..efdcd43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java @@ -30,7 +30,6 @@ public class VectorMapJoinFastKeyStore { private WriteBuffers writeBuffers; - private WriteBuffers.ByteSegmentRef byteSegmentRef; private WriteBuffers.Position readPos; /** @@ -141,17 +140,11 @@ public class VectorMapJoinFastKeyStore { } // Our reading is positioned to the key. -writeBuffers.getByteSegmentRefToCurrent(byteSegmentRef, keyLength, readPos); - -byte[] currentBytes = byteSegmentRef.getBytes(); -int currentStart = (int) byteSegmentRef.getOffset(); - -for (int i = 0; i < keyLength; i++) { - if (currentBytes[currentStart + i] != keyBytes[keyStart + i]) { -// LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); -return false; - } +if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { + // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); + return false; } + // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes"); return true; } @@ -159,7 +152,6 @@ public class VectorMapJoinFastKeyStore { public VectorMapJoinFastKeyStore(int writeBuffersSize) { writeBuffers = new WriteBuffers(writeBuffersSize, AbsoluteKeyOffset.maxSize); -byteSegmentRef = new WriteBuffers.ByteSegmentRef(); readPos = new WriteBuffers.Position(); } @@ -167,7 +159,6 @@ public class VectorMapJoinFastKeyStore { // TODO: Check if maximum size compatible with AbsoluteKeyOffset.maxSize. this.writeBuffers = writeBuffers; -byteSegmentRef = new WriteBuffers.ByteSegmentRef(); readPos = new WriteBuffers.Position(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/f1803799/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java index b47456e..5900428 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java @@ -282,32 +282,33 @@ public final class WriteBuffers implements RandomAccessOutput { return true; } - /** - * Compares part of the buffer with a part of an external byte array. - * Does not modify readPoint. - */ - public boolean isEqual(byte[] left, int leftLength, long rightOffset, int rightLength) { -if (rightLength != leftLength) { - return false; + private final boolean isEqual(byte[] left, int leftOffset, int rightIndex, int rightFrom, int length) { +if (length == 0) { + return true; } -int rightIndex = getBufferIndex(rightOffset), rightFrom = getOffset(rightOffset); +// invariant: rightLength = leftLength +// rightOffset is within the buffers byte[] rightBuffer = writeBuffers.get(rightIndex); -if (rightFrom + rightLength <= wbSize) { +if (rightFrom + length <= wbSize) { // TODO: allow using unsafe optionally. - for (int i = 0; i < leftLength; ++i) { -if (left[i] != rightBuffer[rightFrom + i]) { + // bounds check first, to trigger bugs whether the first byte matches or not + if (left[leftOffset + length - 1] != rightBuffer[rightFrom + length - 1]) { +return false; + } + for (int i = 0; i < length; ++i) { +if (left[leftOffset +
[16/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/skewjoinopt1.q.out -- diff --git a/ql/src/test/results/clientpositive/skewjoinopt1.q.out b/ql/src/test/results/clientpositive/skewjoinopt1.q.out index f3aa0f7..b863188 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt1.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt1.q.out @@ -62,43 +62,47 @@ STAGE PLANS: Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: -0 key (type: string) -1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 +0 _col0 (type: string) +1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3 -Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -133,40 +137,44 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3' (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string) -sort order: + -Map-reduce partition columns: key (type: string) + Select Operator +expressions: key (type: string), val (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE -value expressions: val (type: string) +
[23/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/louter_join_ppr.q.out -- diff --git a/ql/src/test/results/clientpositive/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/louter_join_ppr.q.out index 65fe291..cb2edde 100644 --- a/ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -968,42 +968,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan -alias: b -Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +alias: a +Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 -Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false TableScan -alias: a -Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +alias: b +Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key) > 15.0)) and (UDFToDouble(key) < 25.0)) and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) and (UDFToDouble(key) > 10.0)) and (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 -Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -1147,9 +1147,9 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: -/src [$hdt$_1:$hdt$_1:a] -/srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] -/srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] +/src [$hdt$_0:$hdt$_0:a] +/srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:b] +/srcpart/ds=2008-04-08/hr=12 [$hdt$_1:$hdt$_1:b] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1158,34 +1158,30 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE -
[42/91] [abbrv] [partial] hive git commit: HIVE-12017: Do not disable CBO by default when number of joins in a query is equal or less than 1 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/7dab21ac/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out -- diff --git a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out index 41f3d09..09b981b 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out @@ -461,26 +461,26 @@ PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: database:default PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"mr","database":"default","hash":"386791c174a4999fc916e300b5e76bf2","queryText":"create table dest2 as select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertex Id":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"386791c174a4999fc916e300b5e76bf2","queryText":"create table dest2 as select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","v ertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"mr","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN", "vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"mr","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4,6],"targ
[87/91] [abbrv] hive git commit: HIVE-12338: Add webui to HiveServer2 (Jimmy, reviewed by Mohit, Szehon, Lefty)
http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/css/hive.css -- diff --git a/service/src/resources/hive-webapps/static/css/hive.css b/service/src/resources/hive-webapps/static/css/hive.css new file mode 100644 index 000..b8c9f54 --- /dev/null +++ b/service/src/resources/hive-webapps/static/css/hive.css @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* General styling */ +body { padding-top: 60px; } +.logo img { float: right; } +.inner_header { margin-bottom: 1em; } +section { margin-bottom: 3em; } + http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.eot -- diff --git a/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.eot b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.eot new file mode 100755 index 000..87eaa43 Binary files /dev/null and b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.eot differ http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.svg -- diff --git a/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.svg b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.svg new file mode 100755 index 000..5fee068 --- /dev/null +++ b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.svg @@ -0,0 +1,228 @@ + +http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"; > +http://www.w3.org/2000/svg";> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.ttf -- diff --git a/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.ttf b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.ttf new file mode 100755 index 000..be784dc Binary files /dev/null and b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.ttf differ http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.woff -- diff --git a/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.woff b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.woff new file mode 100755 index 000..2cc3e48 Binary files /dev/null and b/service/src/resources/hive-webapps/static/fonts/glyphicons-halflings-regular.woff differ http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/service/src/resources/hive-webapps/static/hive_logo.jpeg -- diff --git a/service/src/resources/hive-webapps/static/hive_logo.jpeg b/service/src/resources/hive-webapps/static/hive_logo.jpeg new file mode 100644 index 000..8c4a5df Binary files /dev/null and b/service/src/resources/hive-webapps/static/hive_logo.jpeg differ http://git-wip-us.apache.org/repos/asf/hive/blob/2c0c191c/spark-client/pom.xml -- diff --git a/spark-client/pom.xml b/spark-client/pom.xml index 9d2b418..0405016 100644 --- a/spark-client/pom.xml +++ b/spark-client/pom.xml @@ -54,6 +54,12 @@ org.apache.h