http://git-wip-us.apache.org/repos/asf/hive/blob/2509e2fa/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out index d303900..49ff635 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out @@ -1,687 +1,388 @@ ->>> set hive.optimize.bucketmapjoin = true; -No rows affected ->>> set hive.optimize.bucketmapjoin.sortedmerge = true; -No rows affected ->>> set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -No rows affected ->>> set hive.cbo.enable=false; -No rows affected ->>> ->>> set hive.exec.reducers.max = 1; -No rows affected ->>> set hive.merge.mapfiles=false; -No rows affected ->>> set hive.merge.mapredfiles=false; -No rows affected ->>> ->>> -- This test verifies that the sort merge join optimizer works when the tables are joined on columns with different names ->>> ->>> -- Create bucketed and sorted tables ->>> CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS; -INFO : Compiling commandqueryId=(!!{queryId}!!): CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS -INFO : PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS -INFO : PREHOOK: type: CREATETABLE -INFO : PREHOOK: Output: database:smb_mapjoin_13 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table1 -INFO : Starting task [Stage-0:DDL] in serial mode -INFO : POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS -INFO : POSTHOOK: type: CREATETABLE -INFO : POSTHOOK: Output: database:smb_mapjoin_13 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table1 -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS -No rows affected ->>> CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS; -INFO : Compiling commandqueryId=(!!{queryId}!!): CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS -INFO : PREHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS -INFO : PREHOOK: type: CREATETABLE -INFO : PREHOOK: Output: database:smb_mapjoin_13 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table2 -INFO : Starting task [Stage-0:DDL] in serial mode -INFO : POSTHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS -INFO : POSTHOOK: type: CREATETABLE -INFO : POSTHOOK: Output: database:smb_mapjoin_13 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table2 -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS -No rows affected ->>> CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS; -INFO : Compiling commandqueryId=(!!{queryId}!!): CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS -INFO : PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS -INFO : PREHOOK: type: CREATETABLE -INFO : PREHOOK: Output: database:smb_mapjoin_13 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table3 -INFO : Starting task [Stage-0:DDL] in serial mode -INFO : POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS -INFO : POSTHOOK: type: CREATETABLE -INFO : POSTHOOK: Output: database:smb_mapjoin_13 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table3 -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS -No rows affected ->>> CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS; -INFO : Compiling commandqueryId=(!!{queryId}!!): CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS -INFO : PREHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS -INFO : PREHOOK: type: CREATETABLE -INFO : PREHOOK: Output: database:smb_mapjoin_13 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table4 -INFO : Starting task [Stage-0:DDL] in serial mode -INFO : POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS -INFO : POSTHOOK: type: CREATETABLE -INFO : POSTHOOK: Output: database:smb_mapjoin_13 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table4 -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS -No rows affected ->>> ->>> FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT *; -INFO : Compiling commandqueryId=(!!{queryId}!!): FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_col0, type:int, comment:null), FieldSchema(name:_col1, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -INFO : PREHOOK: query: FROM default.src +PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3 +POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3 +PREHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table4 +POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table4 +PREHOOK: query: FROM src INSERT OVERWRITE TABLE test_table1 SELECT * INSERT OVERWRITE TABLE test_table2 SELECT * INSERT OVERWRITE TABLE test_table3 SELECT * INSERT OVERWRITE TABLE test_table4 SELECT * -INFO : PREHOOK: type: QUERY -INFO : PREHOOK: Input: default@src -INFO : PREHOOK: Output: smb_mapjoin_13@test_table1 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table2 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table3 -INFO : PREHOOK: Output: smb_mapjoin_13@test_table4 -WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. -INFO : Query ID = !!{queryId}!! -INFO : Total jobs = 4 -INFO : Launching Job 1 out of 4 -INFO : Starting task [Stage-4:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:1 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : Starting task [Stage-0:MOVE] in serial mode -INFO : Loading data to table smb_mapjoin_13.test_table1 from file:/!!ELIDED!! -INFO : Launching Job 2 out of 4 -INFO : Starting task [Stage-6:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:1 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : Launching Job 3 out of 4 -INFO : Starting task [Stage-8:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:1 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : Launching Job 4 out of 4 -INFO : Starting task [Stage-10:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:1 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : Starting task [Stage-5:STATS] in serial mode -INFO : Starting task [Stage-1:MOVE] in serial mode -INFO : Loading data to table smb_mapjoin_13.test_table2 from file:/!!ELIDED!! -INFO : Starting task [Stage-2:MOVE] in serial mode -INFO : Loading data to table smb_mapjoin_13.test_table3 from file:/!!ELIDED!! -INFO : Starting task [Stage-3:MOVE] in serial mode -INFO : Loading data to table smb_mapjoin_13.test_table4 from file:/!!ELIDED!! -INFO : Starting task [Stage-7:STATS] in serial mode -INFO : Starting task [Stage-9:STATS] in serial mode -INFO : Starting task [Stage-11:STATS] in serial mode -INFO : POSTHOOK: query: FROM default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +PREHOOK: Output: default@test_table3 +PREHOOK: Output: default@test_table4 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE test_table1 SELECT * INSERT OVERWRITE TABLE test_table2 SELECT * INSERT OVERWRITE TABLE test_table3 SELECT * INSERT OVERWRITE TABLE test_table4 SELECT * -INFO : POSTHOOK: type: QUERY -INFO : POSTHOOK: Input: default@src -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table1 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table2 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table3 -INFO : POSTHOOK: Output: smb_mapjoin_13@test_table4 -INFO : POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table2.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -INFO : MapReduce Jobs Launched: -INFO : Stage-Stage-4: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Stage-Stage-6: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Stage-Stage-8: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Stage-Stage-10: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Total MapReduce CPU Time Spent: 0 msec -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -No rows affected ->>> ->>> -- Join data from 2 tables on their respective sorted columns (one each, with different names) and ->>> -- verify sort merge join is used ->>> EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10; -INFO : Compiling commandqueryId=(!!{queryId}!!): EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): EXPLAIN EXTENDED +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Output: default@test_table3 +POSTHOOK: Output: default@test_table4 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: type: QUERY -INFO : Starting task [Stage-3:EXPLAIN] in serial mode -INFO : POSTHOOK: query: EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : POSTHOOK: type: QUERY -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -'Explain' -'STAGE DEPENDENCIES:' -' Stage-1 is a root stage' -' Stage-0 depends on stages: Stage-1' -'' -'STAGE PLANS:' -' Stage: Stage-1' -' Map Reduce' -' Map Operator Tree:' -' TableScan' -' alias: a' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' GatherStats: false' -' Filter Operator' -' isSamplingPred: false' -' predicate: key is not null (type: boolean)' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' Sorted Merge Bucket Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' keys:' -' 0 key (type: int)' -' 1 value (type: int)' -' outputColumnNames: _col0, _col1, _col5, _col6' -' Position of Big Table: 0' -' BucketMapJoin: true' -' Select Operator' -' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' -' outputColumnNames: _col0, _col1, _col2, _col3' -' Reduce Output Operator' -' key expressions: _col0 (type: int)' -' null sort order: a' -' sort order: +' -' tag: -1' -' TopN: 10' -' TopN Hash Memory Usage: 0.1' -' value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string)' -' auto parallelism: false' -' Path -> Alias:' -' file:/!!ELIDED!! [a]' -' Path -> Partition:' -' file:/!!ELIDED!! ' -' Partition' -' base file name: test_table1' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}' -' SORTBUCKETCOLSPREFIX TRUE' -' bucket_count 16' -' bucket_field_name key' -' column.name.delimiter ,' -' columns key,value' -' columns.comments ' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location file:/!!ELIDED!! -' name smb_mapjoin_13.test_table1' -' numFiles 16' -' numRows 500' -' rawDataSize 5312' -' serialization.ddl struct test_table1 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 5812' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}' -' SORTBUCKETCOLSPREFIX TRUE' -' bucket_count 16' -' bucket_field_name key' -' column.name.delimiter ,' -' columns key,value' -' columns.comments ' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location file:/!!ELIDED!! -' name smb_mapjoin_13.test_table1' -' numFiles 16' -' numRows 500' -' rawDataSize 5312' -' serialization.ddl struct test_table1 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 5812' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: smb_mapjoin_13.test_table1' -' name: smb_mapjoin_13.test_table1' -' Truncated Path -> Alias:' -' /smb_mapjoin_13.db/test_table1 [a]' -' Needs Tagging: false' -' Reduce Operator Tree:' -' Select Operator' -' expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)' -' outputColumnNames: _col0, _col1, _col2, _col3' -' Limit' -' Number of rows: 10' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: file:/!!ELIDED!! -' NumFilesPerFileSink: 1' -' Stats Publishing Key Prefix: file:/!!ELIDED!! -' table:' -' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' -' properties:' -' columns _col0,_col1,_col2,_col3' -' columns.types int:string:int:string' -' escape.delim \' -' hive.serialization.extend.additional.nesting.levels true' -' serialization.escape.crlf true' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -'' -' Stage: Stage-0' -' Fetch Operator' -' limit: 10' -' Processor Tree:' -' ListSink' -'' -131 rows selected ->>> ->>> SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10; -INFO : Compiling commandqueryId=(!!{queryId}!!): SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.value, type:int, comment:null), FieldSchema(name:b.key, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: type: QUERY -INFO : PREHOOK: Input: smb_mapjoin_13@test_table1 -INFO : PREHOOK: Input: smb_mapjoin_13@test_table2 -INFO : PREHOOK: Output: file:/!!ELIDED!! -WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. -INFO : Query ID = !!{queryId}!! -INFO : Total jobs = 1 -INFO : Launching Job 1 out of 1 -INFO : Starting task [Stage-1:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:16 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : POSTHOOK: type: QUERY -INFO : POSTHOOK: Input: smb_mapjoin_13@test_table1 -INFO : POSTHOOK: Input: smb_mapjoin_13@test_table2 -INFO : POSTHOOK: Output: file:/!!ELIDED!! -INFO : MapReduce Jobs Launched: -INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Total MapReduce CPU Time Spent: 0 msec -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 -'a.key','a.value','b.value','b.key' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'0','val_0','0','val_0' -'2','val_2','2','val_2' -10 rows selected ->>> ->>> -- Join data from 2 tables on their respective columns (two each, with the same names but sorted ->>> -- with different priorities) and verify sort merge join is not used ->>> EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10; -INFO : Compiling commandqueryId=(!!{queryId}!!): EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: query: EXPLAIN EXTENDED -SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: type: QUERY -INFO : Starting task [Stage-4:EXPLAIN] in serial mode -INFO : POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 value (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 0 + BucketMapJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1 [a] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Limit + Number of rows: 10 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:int:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +PREHOOK: query: EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : POSTHOOK: type: QUERY -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query EXPLAIN EXTENDED +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -'Explain' -'STAGE DEPENDENCIES:' -' Stage-3 is a root stage' -' Stage-1 depends on stages: Stage-3' -' Stage-0 depends on stages: Stage-1' -'' -'STAGE PLANS:' -' Stage: Stage-3' -' Map Reduce Local Work' -' Alias -> Map Local Tables:' -' b ' -' Fetch Operator' -' limit: -1' -' Alias -> Map Local Operator Tree:' -' b ' -' TableScan' -' alias: b' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' GatherStats: false' -' Filter Operator' -' isSamplingPred: false' -' predicate: UDFToDouble(value) is not null (type: boolean)' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' HashTable Sink Operator' -' keys:' -' 0 UDFToDouble(key) (type: double)' -' 1 UDFToDouble(value) (type: double)' -' Position of Big Table: 0' -'' -' Stage: Stage-1' -' Map Reduce' -' Map Operator Tree:' -' TableScan' -' alias: a' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' GatherStats: false' -' Filter Operator' -' isSamplingPred: false' -' predicate: UDFToDouble(key) is not null (type: boolean)' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' keys:' -' 0 UDFToDouble(key) (type: double)' -' 1 UDFToDouble(value) (type: double)' -' outputColumnNames: _col0, _col1, _col5, _col6' -' Position of Big Table: 0' -' Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE' -' Select Operator' -' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' -' outputColumnNames: _col0, _col1, _col2, _col3' -' Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE' -' Reduce Output Operator' -' key expressions: _col0 (type: int)' -' null sort order: a' -' sort order: +' -' Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE' -' tag: -1' -' TopN: 10' -' TopN Hash Memory Usage: 0.1' -' value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string)' -' auto parallelism: false' -' Local Work:' -' Map Reduce Local Work' -' Path -> Alias:' -' file:/!!ELIDED!! [a]' -' Path -> Partition:' -' file:/!!ELIDED!! ' -' Partition' -' base file name: test_table3' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}' -' SORTBUCKETCOLSPREFIX TRUE' -' bucket_count 16' -' bucket_field_name key' -' column.name.delimiter ,' -' columns key,value' -' columns.comments ' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location file:/!!ELIDED!! -' name smb_mapjoin_13.test_table3' -' numFiles 16' -' numRows 500' -' rawDataSize 5312' -' serialization.ddl struct test_table3 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 5812' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' ' -' input format: org.apache.hadoop.mapred.TextInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' properties:' -' COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}' -' SORTBUCKETCOLSPREFIX TRUE' -' bucket_count 16' -' bucket_field_name key' -' column.name.delimiter ,' -' columns key,value' -' columns.comments ' -' columns.types int:string' -' file.inputformat org.apache.hadoop.mapred.TextInputFormat' -' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' -' location file:/!!ELIDED!! -' name smb_mapjoin_13.test_table3' -' numFiles 16' -' numRows 500' -' rawDataSize 5312' -' serialization.ddl struct test_table3 { i32 key, string value}' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' totalSize 5812' -' transient_lastDdlTime !!UNIXTIME!!' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' name: smb_mapjoin_13.test_table3' -' name: smb_mapjoin_13.test_table3' -' Truncated Path -> Alias:' -' /smb_mapjoin_13.db/test_table3 [a]' -' Needs Tagging: false' -' Reduce Operator Tree:' -' Select Operator' -' expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)' -' outputColumnNames: _col0, _col1, _col2, _col3' -' Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE' -' Limit' -' Number of rows: 10' -' Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE' -' File Output Operator' -' compressed: false' -' GlobalTableId: 0' -' directory: file:/!!ELIDED!! -' NumFilesPerFileSink: 1' -' Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE' -' Stats Publishing Key Prefix: file:/!!ELIDED!! -' table:' -' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' -' properties:' -' columns _col0,_col1,_col2,_col3' -' columns.types int:string:int:string' -' escape.delim \' -' hive.serialization.extend.additional.nesting.levels true' -' serialization.escape.crlf true' -' serialization.format 1' -' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -' TotalFiles: 1' -' GatherStats: false' -' MultiFileSpray: false' -'' -' Stage: Stage-0' -' Fetch Operator' -' limit: 10' -' Processor Tree:' -' ListSink' -'' -161 rows selected ->>> ->>> SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10; -INFO : Compiling commandqueryId=(!!{queryId}!!): SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : PREHOOK: type: QUERY -INFO : PREHOOK: Input: smb_mapjoin_13@test_table3 -INFO : PREHOOK: Input: smb_mapjoin_13@test_table4 -INFO : PREHOOK: Output: file:/!!ELIDED!! -WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. -INFO : Query ID = !!{queryId}!! -INFO : Total jobs = 1 -INFO : Starting task [Stage-3:MAPREDLOCAL] in serial mode -INFO : Starting to launch local task to process map join; maximum memory = !!ELIDED!! -INFO : End of local task; Time taken: !!ELIDED!! sec. -INFO : Launching Job 1 out of 1 -INFO : Starting task [Stage-1:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:16 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -INFO : POSTHOOK: type: QUERY -INFO : POSTHOOK: Input: smb_mapjoin_13@test_table3 -INFO : POSTHOOK: Input: smb_mapjoin_13@test_table4 -INFO : POSTHOOK: Output: file:/!!ELIDED!! -INFO : MapReduce Jobs Launched: -INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Total MapReduce CPU Time Spent: 0 msec -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 -'a.key','a.value','b.key','b.value' -No rows selected ->>> !record +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(value) is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(value) (type: double) + Position of Big Table: 0 + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(value) (type: double) + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + numFiles 16 + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + numFiles 16 + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + name: default.test_table3 + Truncated Path -> Alias: + /test_table3 [a] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:int:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/2509e2fa/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out index b8a06dc..ab2b323 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out @@ -1,254 +1,96 @@ ->>> set hive.optimize.bucketmapjoin = true; -No rows affected ->>> set hive.optimize.bucketmapjoin.sortedmerge = true; -No rows affected ->>> set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -No rows affected ->>> set hive.cbo.enable=false; -No rows affected ->>> ->>> set hive.exec.reducers.max = 1; -No rows affected ->>> set hive.merge.mapfiles=false; -No rows affected ->>> set hive.merge.mapredfiles=false; -No rows affected ->>> ->>> -- Create bucketed and sorted tables ->>> CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; -INFO : Compiling commandqueryId=(!!{queryId}!!): CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : PREHOOK: type: CREATETABLE -INFO : PREHOOK: Output: database:smb_mapjoin_16 -INFO : PREHOOK: Output: smb_mapjoin_16@test_table1 -INFO : Starting task [Stage-0:DDL] in serial mode -INFO : POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : POSTHOOK: type: CREATETABLE -INFO : POSTHOOK: Output: database:smb_mapjoin_16 -INFO : POSTHOOK: Output: smb_mapjoin_16@test_table1 -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -No rows affected ->>> CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; -INFO : Compiling commandqueryId=(!!{queryId}!!): CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : PREHOOK: type: CREATETABLE -INFO : PREHOOK: Output: database:smb_mapjoin_16 -INFO : PREHOOK: Output: smb_mapjoin_16@test_table2 -INFO : Starting task [Stage-0:DDL] in serial mode -INFO : POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -INFO : POSTHOOK: type: CREATETABLE -INFO : POSTHOOK: Output: database:smb_mapjoin_16 -INFO : POSTHOOK: Output: smb_mapjoin_16@test_table2 -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -No rows affected ->>> ->>> FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT *; -INFO : Compiling commandqueryId=(!!{queryId}!!): FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_col0, type:int, comment:null), FieldSchema(name:_col1, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INFO : PREHOOK: query: FROM default.src +PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src INSERT OVERWRITE TABLE test_table1 SELECT * INSERT OVERWRITE TABLE test_table2 SELECT * -INFO : PREHOOK: type: QUERY -INFO : PREHOOK: Input: default@src -INFO : PREHOOK: Output: smb_mapjoin_16@test_table1 -INFO : PREHOOK: Output: smb_mapjoin_16@test_table2 -WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. -INFO : Query ID = !!{queryId}!! -INFO : Total jobs = 2 -INFO : Launching Job 1 out of 2 -INFO : Starting task [Stage-2:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:1 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : Starting task [Stage-0:MOVE] in serial mode -INFO : Loading data to table smb_mapjoin_16.test_table1 from file:/!!ELIDED!! -INFO : Launching Job 2 out of 2 -INFO : Starting task [Stage-4:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:1 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : Starting task [Stage-3:STATS] in serial mode -INFO : Starting task [Stage-1:MOVE] in serial mode -INFO : Loading data to table smb_mapjoin_16.test_table2 from file:/!!ELIDED!! -INFO : Starting task [Stage-5:STATS] in serial mode -INFO : POSTHOOK: query: FROM default.src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE test_table1 SELECT * INSERT OVERWRITE TABLE test_table2 SELECT * -INFO : POSTHOOK: type: QUERY -INFO : POSTHOOK: Input: default@src -INFO : POSTHOOK: Output: smb_mapjoin_16@test_table1 -INFO : POSTHOOK: Output: smb_mapjoin_16@test_table2 -INFO : POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -INFO : POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -INFO : MapReduce Jobs Launched: -INFO : Stage-Stage-2: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Stage-Stage-4: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Total MapReduce CPU Time Spent: 0 msec -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query FROM default.src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -No rows affected ->>> ->>> -- Mapjoin followed by a aggregation should be performed in a single MR job ->>> EXPLAIN -SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; -INFO : Compiling commandqueryId=(!!{queryId}!!): EXPLAIN -SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): EXPLAIN -SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : PREHOOK: query: EXPLAIN -SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : PREHOOK: type: QUERY -INFO : Starting task [Stage-3:EXPLAIN] in serial mode -INFO : POSTHOOK: query: EXPLAIN +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : POSTHOOK: type: QUERY -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query EXPLAIN +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -'Explain' -'STAGE DEPENDENCIES:' -' Stage-1 is a root stage' -' Stage-0 depends on stages: Stage-1' -'' -'STAGE PLANS:' -' Stage: Stage-1' -' Map Reduce' -' Map Operator Tree:' -' TableScan' -' alias: a' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' Filter Operator' -' predicate: key is not null (type: boolean)' -' Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE' -' Sorted Merge Bucket Map Join Operator' -' condition map:' -' Inner Join 0 to 1' -' keys:' -' 0 key (type: int)' -' 1 key (type: int)' -' Group By Operator' -' aggregations: count()' -' mode: hash' -' outputColumnNames: _col0' -' Reduce Output Operator' -' sort order: ' -' value expressions: _col0 (type: bigint)' -' Reduce Operator Tree:' -' Group By Operator' -' aggregations: count(VALUE._col0)' -' mode: mergepartial' -' outputColumnNames: _col0' -' File Output Operator' -' compressed: false' -' table:' -' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' -' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' -' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' -'' -' Stage: Stage-0' -' Fetch Operator' -' limit: -1' -' Processor Tree:' -' ListSink' -'' -45 rows selected ->>> SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; -INFO : Compiling commandqueryId=(!!{queryId}!!): SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : Semantic Analysis Completed -INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_c1, type:bigint, comment:null)], properties:null) -INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : Executing commandqueryId=(!!{queryId}!!): SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : PREHOOK: type: QUERY -INFO : PREHOOK: Input: smb_mapjoin_16@test_table1 -INFO : PREHOOK: Input: smb_mapjoin_16@test_table2 -INFO : PREHOOK: Output: file:/!!ELIDED!! -WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. -INFO : Query ID = !!{queryId}!! -INFO : Total jobs = 1 -INFO : Launching Job 1 out of 1 -INFO : Starting task [Stage-1:MAPRED] in serial mode -INFO : Number of reduce tasks determined at compile time: 1 -INFO : In order to change the average load for a reducer (in bytes): -INFO : set hive.exec.reducers.bytes.per.reducer=<number> -INFO : In order to limit the maximum number of reducers: -INFO : set hive.exec.reducers.max=<number> -INFO : In order to set a constant number of reducers: -INFO : set mapreduce.job.reduces=<number> -DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir -DEBUG : adding the following namenodes' delegation tokens:[file:///] -DEBUG : Creating splits at file:/!!ELIDED!! -INFO : number of splits:2 -INFO : Submitting tokens for job: !!{jobId}}!! -INFO : The url to track the job: http://localhost:8080/ -INFO : Job running in-process (local Hadoop) -INFO : Ended Job = !!{jobId}!! -INFO : POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -INFO : POSTHOOK: type: QUERY -INFO : POSTHOOK: Input: smb_mapjoin_16@test_table1 -INFO : POSTHOOK: Input: smb_mapjoin_16@test_table2 -INFO : POSTHOOK: Output: file:/!!ELIDED!! -INFO : MapReduce Jobs Launched: -INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS -INFO : Total MapReduce CPU Time Spent: 0 msec -INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds -INFO : OK -DEBUG : Shutting down query SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key -'_c1' -'1028' -1 row selected ->>> !record +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +1028