http://git-wip-us.apache.org/repos/asf/hive/blob/a2ce7f3d/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out new file mode 100644 index 0000000..70a37ca --- /dev/null +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out @@ -0,0 +1,948 @@ +>>> set hive.strict.checks.bucketing=false; +No rows affected +>>> +>>> +>>> +>>> +>>> +>>> create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; +INFO : Compiling commandqueryId=(!!{queryId}!!): create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : PREHOOK: type: CREATETABLE +INFO : PREHOOK: Output: database:smb_mapjoin_1 +INFO : PREHOOK: Output: smb_mapjoin_1@smb_bucket_1 +INFO : Starting task [Stage-0:DDL] in serial mode +INFO : POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : POSTHOOK: type: CREATETABLE +INFO : POSTHOOK: Output: database:smb_mapjoin_1 +INFO : POSTHOOK: Output: smb_mapjoin_1@smb_bucket_1 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +No rows affected +>>> create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; +INFO : Compiling commandqueryId=(!!{queryId}!!): create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : PREHOOK: type: CREATETABLE +INFO : PREHOOK: Output: database:smb_mapjoin_1 +INFO : PREHOOK: Output: smb_mapjoin_1@smb_bucket_2 +INFO : Starting task [Stage-0:DDL] in serial mode +INFO : POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : POSTHOOK: type: CREATETABLE +INFO : POSTHOOK: Output: database:smb_mapjoin_1 +INFO : POSTHOOK: Output: smb_mapjoin_1@smb_bucket_2 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +No rows affected +>>> create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE; +INFO : Compiling commandqueryId=(!!{queryId}!!): create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : PREHOOK: type: CREATETABLE +INFO : PREHOOK: Output: database:smb_mapjoin_1 +INFO : PREHOOK: Output: smb_mapjoin_1@smb_bucket_3 +INFO : Starting task [Stage-0:DDL] in serial mode +INFO : POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +INFO : POSTHOOK: type: CREATETABLE +INFO : POSTHOOK: Output: database:smb_mapjoin_1 +INFO : POSTHOOK: Output: smb_mapjoin_1@smb_bucket_3 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +No rows affected +>>> +>>> load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1; +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_1@smb_bucket_1 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_1.smb_bucket_1 from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_1@smb_bucket_1 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +No rows affected +>>> load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_1@smb_bucket_2 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_1.smb_bucket_2 from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_1@smb_bucket_2 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +No rows affected +>>> load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_1@smb_bucket_3 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_1.smb_bucket_3 from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_1@smb_bucket_3 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +No rows affected +>>> +>>> set hive.cbo.enable=false; +No rows affected +>>> set hive.optimize.bucketmapjoin = true; +No rows affected +>>> set hive.optimize.bucketmapjoin.sortedmerge = true; +No rows affected +>>> set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +No rows affected +>>> +>>> -- SORT_QUERY_RESULTS +>>> +>>> explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: b' +' Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE' +' Filter Operator' +' predicate: key is not null (type: boolean)' +' Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Inner Join 0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +37 rows selected +>>> select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +No rows selected +>>> +>>> explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: b' +' Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Left Outer Join0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +34 rows selected +>>> select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +'1','val_1','NULL','NULL' +'3','val_3','NULL','NULL' +'4','val_4','NULL','NULL' +'5','val_5','NULL','NULL' +'10','val_10','NULL','NULL' +5 rows selected +>>> +>>> explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: b' +' Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Right Outer Join0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +34 rows selected +>>> select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +'NULL','NULL','20','val_20' +'NULL','NULL','23','val_23' +'NULL','NULL','25','val_25' +'NULL','NULL','30','val_30' +4 rows selected +>>> +>>> explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: b' +' Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Outer Join 0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +34 rows selected +>>> select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +'1','val_1','NULL','NULL' +'3','val_3','NULL','NULL' +'4','val_4','NULL','NULL' +'5','val_5','NULL','NULL' +'10','val_10','NULL','NULL' +'NULL','NULL','20','val_20' +'NULL','NULL','23','val_23' +'NULL','NULL','25','val_25' +'NULL','NULL','30','val_30' +9 rows selected +>>> +>>> +>>> explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: a' +' Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE' +' Filter Operator' +' predicate: key is not null (type: boolean)' +' Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Inner Join 0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +37 rows selected +>>> select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +No rows selected +>>> +>>> explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: a' +' Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Left Outer Join0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +34 rows selected +>>> select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +'1','val_1','NULL','NULL' +'3','val_3','NULL','NULL' +'4','val_4','NULL','NULL' +'5','val_5','NULL','NULL' +'10','val_10','NULL','NULL' +5 rows selected +>>> +>>> explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: a' +' Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Right Outer Join0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +34 rows selected +>>> select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +'NULL','NULL','20','val_20' +'NULL','NULL','23','val_23' +'NULL','NULL','25','val_25' +'NULL','NULL','30','val_30' +4 rows selected +>>> +>>> explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: a' +' Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Outer Join 0 to 1' +' keys:' +' 0 key (type: int)' +' 1 key (type: int)' +' outputColumnNames: _col0, _col1, _col5, _col6' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +34 rows selected +>>> select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key; +INFO : Compiling commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:a.key, type:int, comment:null), FieldSchema(name:a.value, type:string, comment:null), FieldSchema(name:b.key, type:int, comment:null), FieldSchema(name:b.value, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : PREHOOK: type: QUERY +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : PREHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : PREHOOK: Output: file:/!!ELIDED!! +WARN : Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +INFO : Query ID = !!{queryId}!! +INFO : Total jobs = 1 +INFO : Launching Job 1 out of 1 +INFO : Starting task [Stage-1:MAPRED] in serial mode +INFO : Number of reduce tasks is set to 0 since there's no reduce operator +DEBUG : Configuring job !!{jobId}}!! with file:/!!ELIDED!! as the submit dir +DEBUG : adding the following namenodes' delegation tokens:[file:///] +DEBUG : Creating splits at file:/!!ELIDED!! +INFO : number of splits:1 +INFO : Submitting tokens for job: !!{jobId}}!! +INFO : The url to track the job: http://localhost:8080/ +INFO : Job running in-process (local Hadoop) +INFO : Ended Job = !!{jobId}!! +INFO : POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +INFO : POSTHOOK: type: QUERY +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_1 +INFO : POSTHOOK: Input: smb_mapjoin_1@smb_bucket_2 +INFO : POSTHOOK: Output: file:/!!ELIDED!! +INFO : MapReduce Jobs Launched: +INFO : Stage-Stage-1: HDFS Read: 0 HDFS Write: 0 SUCCESS +INFO : Total MapReduce CPU Time Spent: 0 msec +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +'a.key','a.value','b.key','b.value' +'1','val_1','NULL','NULL' +'3','val_3','NULL','NULL' +'4','val_4','NULL','NULL' +'5','val_5','NULL','NULL' +'10','val_10','NULL','NULL' +'NULL','NULL','20','val_20' +'NULL','NULL','23','val_23' +'NULL','NULL','25','val_25' +'NULL','NULL','30','val_30' +9 rows selected +>>> +>>> +>>> +>>> +>>> +>>> !record
http://git-wip-us.apache.org/repos/asf/hive/blob/a2ce7f3d/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out new file mode 100644 index 0000000..bdfaefb --- /dev/null +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_10.q.out @@ -0,0 +1,248 @@ +>>> set hive.strict.checks.bucketing=false; +No rows affected +>>> +>>> +>>> create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE; +INFO : Compiling commandqueryId=(!!{queryId}!!): create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +INFO : PREHOOK: query: create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +INFO : PREHOOK: type: CREATETABLE +INFO : PREHOOK: Output: database:smb_mapjoin_10 +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10 +INFO : Starting task [Stage-0:DDL] in serial mode +INFO : POSTHOOK: query: create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +INFO : POSTHOOK: type: CREATETABLE +INFO : POSTHOOK: Output: database:smb_mapjoin_10 +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +No rows affected +>>> +>>> alter table tmp_smb_bucket_10 add partition (ds = '1'); +INFO : Compiling commandqueryId=(!!{queryId}!!): alter table tmp_smb_bucket_10 add partition (ds = '1') +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): alter table tmp_smb_bucket_10 add partition (ds = '1') +INFO : PREHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '1') +INFO : PREHOOK: type: ALTERTABLE_ADDPARTS +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10 +INFO : Starting task [Stage-0:DDL] in serial mode +INFO : POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '1') +INFO : POSTHOOK: type: ALTERTABLE_ADDPARTS +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10 +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=1 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query alter table tmp_smb_bucket_10 add partition (ds = '1') +No rows affected +>>> alter table tmp_smb_bucket_10 add partition (ds = '2'); +INFO : Compiling commandqueryId=(!!{queryId}!!): alter table tmp_smb_bucket_10 add partition (ds = '2') +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): alter table tmp_smb_bucket_10 add partition (ds = '2') +INFO : PREHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') +INFO : PREHOOK: type: ALTERTABLE_ADDPARTS +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10 +INFO : Starting task [Stage-0:DDL] in serial mode +INFO : POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') +INFO : POSTHOOK: type: ALTERTABLE_ADDPARTS +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10 +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=2 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query alter table tmp_smb_bucket_10 add partition (ds = '2') +No rows affected +>>> +>>> -- add dummy files to make sure that the number of files in each partition is same as number of buckets +>>> +>>> load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=1 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_10.tmp_smb_bucket_10 partition (ds=1) from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=1 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +No rows affected +>>> load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=1 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_10.tmp_smb_bucket_10 partition (ds=1) from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=1 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +No rows affected +>>> +>>> load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=2 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_10.tmp_smb_bucket_10 partition (ds=2) from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=2 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +No rows affected +>>> load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); +INFO : Compiling commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : PREHOOK: type: LOAD +INFO : PREHOOK: Input: file:/!!ELIDED!! +INFO : PREHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=2 +INFO : Starting task [Stage-0:MOVE] in serial mode +INFO : Loading data to table smb_mapjoin_10.tmp_smb_bucket_10 partition (ds=2) from file:/!!ELIDED!! +INFO : Starting task [Stage-1:STATS] in serial mode +INFO : POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +INFO : POSTHOOK: type: LOAD +INFO : POSTHOOK: Input: file:/!!ELIDED!! +INFO : POSTHOOK: Output: smb_mapjoin_10@tmp_smb_bucket_10@ds=2 +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +No rows affected +>>> set hive.cbo.enable=false; +No rows affected +>>> set hive.optimize.bucketmapjoin = true; +No rows affected +>>> set hive.optimize.bucketmapjoin.sortedmerge = true; +No rows affected +>>> set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +No rows affected +>>> +>>> explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type); +INFO : Compiling commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +INFO : Semantic Analysis Completed +INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain, type:string, comment:null)], properties:null) +INFO : Completed compiling commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : Executing commandqueryId=(!!{queryId}!!): explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +INFO : PREHOOK: query: explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +INFO : PREHOOK: type: QUERY +INFO : Starting task [Stage-3:EXPLAIN] in serial mode +INFO : POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +INFO : POSTHOOK: type: QUERY +INFO : Completed executing commandqueryId=(!!{queryId}!!); Time taken: !!ELIDED!! seconds +INFO : OK +DEBUG : Shutting down query explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +'Explain' +'STAGE DEPENDENCIES:' +' Stage-1 is a root stage' +' Stage-0 depends on stages: Stage-1' +'' +'STAGE PLANS:' +' Stage: Stage-1' +' Map Reduce' +' Map Operator Tree:' +' TableScan' +' alias: b' +' Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE' +' Filter Operator' +' predicate: (userid is not null and pageid is not null and postid is not null and type is not null) (type: boolean)' +' Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE' +' Sorted Merge Bucket Map Join Operator' +' condition map:' +' Inner Join 0 to 1' +' keys:' +' 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string)' +' 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11' +' Select Operator' +' expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string)' +' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9' +' File Output Operator' +' compressed: false' +' table:' +' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' +' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' +' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +'' +' Stage: Stage-0' +' Fetch Operator' +' limit: -1' +' Processor Tree:' +' ListSink' +'' +37 rows selected +>>> +>>> !record