http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
b/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
deleted file mode 100644
index 6809b12..0000000
--- a/ql/src/test/results/clientpositive/bucketmapjoin2.q.out
+++ /dev/null
@@ -1,1738 +0,0 @@
-PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string)
partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@srcbucket_mapjoin_part
-POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string)
partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@srcbucket_mapjoin_part
-PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO
TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt'
INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part
-POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO
TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt'
INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO
TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt'
INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO
TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt'
INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string)
partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@srcbucket_mapjoin_part_2
-POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string)
partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2
-PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO
TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part_2
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt'
INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO
TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt'
INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1
bigint, value2 bigint)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1
bigint, value2 bigint)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@bucketmapjoin_hash_result_1
-PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1
bigint, value2 bigint)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1
bigint, value2 bigint)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@bucketmapjoin_hash_result_2
-PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1
string, value2 string)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1
string, value2 string)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-PREHOOK: query: explain extended
-insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended
-insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-1 depends on stages: Stage-9
- Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
- Stage-4
- Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
- Stage-2 depends on stages: Stage-0
- Stage-3
- Stage-5
- Stage-6 depends on stages: Stage-5
-
-STAGE PLANS:
- Stage: Stage-9
- Map Reduce Local Work
- Alias -> Map Local Tables:
- b
- Fetch Operator
- limit: -1
- Partition Description:
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
- Alias -> Map Local Operator Tree:
- b
- TableScan
- alias: b
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE
Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Bucket Mapjoin Context:
- Alias Bucket Base File Name Mapping:
- b {ds=2008-04-08/srcbucket20.txt=[ds=2008-04-08/srcbucket22.txt],
ds=2008-04-08/srcbucket21.txt=[ds=2008-04-08/srcbucket23.txt],
ds=2008-04-08/srcbucket22.txt=[ds=2008-04-08/srcbucket22.txt],
ds=2008-04-08/srcbucket23.txt=[ds=2008-04-08/srcbucket23.txt]}
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE
Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE
Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Position of Big Table: 0
- Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE
Column stats: NONE
- BucketMapJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7
(type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 60 Data size: 6393 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 60 Data size: 6393 Basic stats:
COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result {
string key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 4
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part/ds=2008-04-08 [a]
-
- Stage: Stage-7
- Conditional Operator
-
- Stage: Stage-4
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
-
- Stage: Stage-2
- Stats-Aggr Operator
-#### A masked pattern was here ####
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string key,
string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- name: default.bucketmapjoin_tmp_result
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string key,
string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 0
- numRows 0
- rawDataSize 0
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 0
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- name: default.bucketmapjoin_tmp_result
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
-PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcbucket_mapjoin_part
-PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcbucket_mapjoin_part
-POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE
[(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string,
comment:null), ]
-PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-564
-PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-PREHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-POSTHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2,
type:string, comment:null), ]
-PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcbucket_mapjoin_part
-PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcbucket_mapjoin_part
-POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE
[(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string,
comment:null), ]
-PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-564
-PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-PREHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-POSTHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2,
type:string, comment:null), ]
-PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
-from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2
b
-on a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_hash_result_1
-PREHOOK: Input: default@bucketmapjoin_hash_result_2
-#### A masked pattern was here ####
-POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
-from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2
b
-on a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_hash_result_1
-POSTHOOK: Input: default@bucketmapjoin_hash_result_2
-#### A masked pattern was here ####
-0 0 0
-PREHOOK: query: explain extended
-insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(a)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended
-insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(a)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-1 depends on stages: Stage-9
- Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
- Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
-
-STAGE PLANS:
- Stage: Stage-9
- Map Reduce Local Work
- Alias -> Map Local Tables:
- a
- Fetch Operator
- limit: -1
- Partition Description:
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 4
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
- Alias -> Map Local Operator Tree:
- a
- TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE
Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 1
- Bucket Mapjoin Context:
- Alias Bucket Base File Name Mapping:
- a {ds=2008-04-08/srcbucket22.txt=[ds=2008-04-08/srcbucket20.txt,
ds=2008-04-08/srcbucket22.txt],
ds=2008-04-08/srcbucket23.txt=[ds=2008-04-08/srcbucket21.txt,
ds=2008-04-08/srcbucket23.txt]}
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE
Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE
Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Position of Big Table: 1
- Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE
Column stats: NONE
- BucketMapJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7
(type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 60 Data size: 6393 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 60 Data size: 6393 Basic stats:
COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result {
string key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
-
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
-
- Stage: Stage-3
- Stats-Aggr Operator
-#### A masked pattern was here ####
-
- Stage: Stage-4
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key,
string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- name: default.bucketmapjoin_tmp_result
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-6
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key,
string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- name: default.bucketmapjoin_tmp_result
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
-PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(a)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcbucket_mapjoin_part
-PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(a)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcbucket_mapjoin_part
-POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE
[(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string,
comment:null), ]
-PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-564
-PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-PREHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-POSTHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2,
type:string, comment:null), ]
-PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(a)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcbucket_mapjoin_part
-PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(a)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key and b.ds="2008-04-08"
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcbucket_mapjoin_part
-POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE
[(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string,
comment:null), ]
-PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-564
-PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-PREHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-POSTHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2,
type:string, comment:null), ]
-PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
-from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2
b
-on a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_hash_result_1
-PREHOOK: Input: default@bucketmapjoin_hash_result_2
-#### A masked pattern was here ####
-POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
-from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2
b
-on a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_hash_result_1
-POSTHOOK: Input: default@bucketmapjoin_hash_result_2
-#### A masked pattern was here ####
-0 0 0
-PREHOOK: query: -- HIVE-3210
-load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE
srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part_2
-POSTHOOK: query: -- HIVE-3210
-load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE
srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO
TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt'
INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-PREHOOK: query: explain extended
-insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended
-insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-1 depends on stages: Stage-9
- Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
- Stage-4
- Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
- Stage-2 depends on stages: Stage-0
- Stage-3
- Stage-5
- Stage-6 depends on stages: Stage-5
-
-STAGE PLANS:
- Stage: Stage-9
- Map Reduce Local Work
- Alias -> Map Local Tables:
- b
- Fetch Operator
- limit: -1
- Partition Description:
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32
key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
- Alias -> Map Local Operator Tree:
- b
- TableScan
- alias: b
- Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE
Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Bucket Mapjoin Context:
- Alias Bucket Base File Name Mapping:
- b {ds=2008-04-08/srcbucket20.txt=[ds=2008-04-08/srcbucket22.txt,
ds=2008-04-09/srcbucket22.txt],
ds=2008-04-08/srcbucket21.txt=[ds=2008-04-08/srcbucket23.txt,
ds=2008-04-09/srcbucket23.txt],
ds=2008-04-08/srcbucket22.txt=[ds=2008-04-08/srcbucket22.txt,
ds=2008-04-09/srcbucket22.txt],
ds=2008-04-08/srcbucket23.txt=[ds=2008-04-08/srcbucket23.txt,
ds=2008-04-09/srcbucket23.txt]}
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE
Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE
Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Position of Big Table: 0
- Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE
Column stats: NONE
- BucketMapJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7
(type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 63 Data size: 6736 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 63 Data size: 6736 Basic stats:
COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result {
string key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 4
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
-#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32 key,
string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part/ds=2008-04-08 [a]
-
- Stage: Stage-7
- Conditional Operator
-
- Stage: Stage-4
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
-
- Stage: Stage-2
- Stats-Aggr Operator
-#### A masked pattern was here ####
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key,
string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- name: default.bucketmapjoin_tmp_result
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key,
string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string
key, string value1, string value2}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- name: default.bucketmapjoin_tmp_result
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
-PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcbucket_mapjoin_part
-PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcbucket_mapjoin_part
-POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE
[(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string,
comment:null), ]
-PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-1128
-PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-PREHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-POSTHOOK: Output: default@bucketmapjoin_hash_result_1
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2,
type:string, comment:null), ]
-PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@srcbucket_mapjoin_part
-PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-PREHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
-select /*+mapjoin(b)*/ a.key, a.value, b.value
-from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
-on a.key=b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@srcbucket_mapjoin_part
-POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
-POSTHOOK: Output: default@bucketmapjoin_tmp_result
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE
[(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE
[(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string,
comment:null), ]
-PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-#### A masked pattern was here ####
-1128
-PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_tmp_result
-PREHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
-select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from
bucketmapjoin_tmp_result
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_tmp_result
-POSTHOOK: Output: default@bucketmapjoin_hash_result_2
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1,
type:string, comment:null), ]
-POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION
[(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2,
type:string, comment:null), ]
-PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
-from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2
b
-on a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucketmapjoin_hash_result_1
-PREHOOK: Input: default@bucketmapjoin_hash_result_2
-#### A masked pattern was here ####
-POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
-from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2
b
-on a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucketmapjoin_hash_result_1
-POSTHOOK: Input: default@bucketmapjoin_hash_result_2
-#### A masked pattern was here ####
-0 0 0