http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out deleted file mode 100644 index 1bfdba2..0000000 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out +++ /dev/null @@ -1,890 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_4 -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_4 -RUN: Stage-0:DDL -PREHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_5 -POSTHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_5 -RUN: Stage-0:DDL -Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 4 <- Reducer 3 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 11 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Reducer 9 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-4 - Stats-Aggr Operator - -Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-2:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-5 - Spark - Edges: - Reducer 6 <- Map 5 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 5 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 6 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-4 - Stats-Aggr Operator - -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-5:MAPRED -RUN: Stage-2:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2
http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 04dd9b4..d6df85a 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -73,8 +73,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 4), Map 6 (PARTITION-LEVEL SORT, 4) Reducer 9 <- Map 8 (GROUP, 1) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -105,21 +105,21 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -235,19 +235,17 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: bigint) + keys: 0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Move Operator @@ -308,10 +306,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@src_4 POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-2:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE @@ -514,16 +512,16 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 key (type: string), value (type: string) @@ -578,16 +576,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: bigint) + keys: 0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 @@ -730,10 +726,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@src_4 POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-5:MAPRED RUN: Stage-2:MAPRED RUN: Stage-1:MOVE http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out deleted file mode 100644 index 86b7544..0000000 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out +++ /dev/null @@ -1,217 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:decimal(12,0),input:decimal(10,0)>) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50.0000 -65537 50.0 50.0 50.0000 -65538 50.0 50.0 50.0000 -65539 50.0 50.0 50.0000 -65540 50.0 50.0 50.0000 -65541 50.0 50.0 50.0000 -65542 50.0 50.0 50.0000 -65543 50.0 50.0 50.0000 -65544 50.0 50.0 50.0000 -65545 50.0 50.0 50.0000 http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out deleted file mode 100644 index 69f4754..0000000 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out +++ /dev/null @@ -1,203 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:decimal(12,0),input:decimal(10,0)>) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65636 50.0 50.0 50 -65550 50.0 50.0 50 -65592 50.0 50.0 50 -65744 50.0 50.0 50 -65722 50.0 50.0 50 -65668 50.0 50.0 50 -65598 50.0 50.0 50 -65596 50.0 50.0 50 -65568 50.0 50.0 50 -65738 50.0 50.0 50 http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 63cdc24..0459d93 100644 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -102,14 +102,14 @@ PREHOOK: query: EXPLAIN SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -119,7 +119,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 4) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -129,11 +130,11 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int) - outputColumnNames: i + outputColumnNames: _col0 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(50), avg(UDFToDouble(50)), avg(CAST( 50 AS decimal(10,0))) - keys: i (type: int) + aggregations: avg(50), avg(50.0), avg(50) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +153,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE @@ -159,8 +173,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -174,7 +188,7 @@ PREHOOK: query: SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@over1korc #### A masked pattern was here #### @@ -183,17 +197,17 @@ POSTHOOK: query: SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -65598 50.0 50.0 50 -65694 50.0 50.0 50 -65678 50.0 50.0 50 -65684 50.0 50.0 50 -65596 50.0 50.0 50 -65692 50.0 50.0 50 -65630 50.0 50.0 50 -65674 50.0 50.0 50 -65628 50.0 50.0 50 -65776 50.0 50.0 50 +65536 50.0 50.0 50.0000 +65537 50.0 50.0 50.0000 +65538 50.0 50.0 50.0000 +65539 50.0 50.0 50.0000 +65540 50.0 50.0 50.0000 +65541 50.0 50.0 50.0000 +65542 50.0 50.0 50.0000 +65543 50.0 50.0 50.0000 +65544 50.0 50.0 50.0000 +65545 50.0 50.0 50.0000 http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out b/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out deleted file mode 100644 index a4908bc..0000000 --- a/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out +++ /dev/null @@ -1,191 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: -- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: query: -- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket -POSTHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket -# col_name data_type comment - -c1 string -c2 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: stats_list_bucket -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82, [466, val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket_1 -PREHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: Lineage: stats_list_bucket_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket_1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: query: desc formatted stats_list_bucket_1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -# col_name data_type comment - -c1 string -c2 string - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82, [466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats_list_bucket -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket_1 -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: Output: default@stats_list_bucket_1 http://git-wip-us.apache.org/repos/asf/hive/blob/22541610/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out b/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out deleted file mode 100644 index 8688cee..0000000 --- a/ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out +++ /dev/null @@ -1,193 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: -- Make sure we use hashed IDs during stats publishing. --- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: query: -- Make sure we use hashed IDs during stats publishing. --- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket -POSTHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket -# col_name data_type comment - -c1 string -c2 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: stats_list_bucket -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287, [82, val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket_1 -PREHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: Lineage: stats_list_bucket_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket_1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: query: desc formatted stats_list_bucket_1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -# col_name data_type comment - -c1 string -c2 string - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats_list_bucket -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket_1 -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: Output: default@stats_list_bucket_1