http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out new file mode 100644 index 0000000..16803c9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -0,0 +1,592 @@ +PREHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index e79cdf7..f7c00f8 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -491,31 +491,40 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] + Top N Key Operator + sort order: + keys: ctinyint (type: tinyint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + top n: 20 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:tinyint + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0] + valueColumnNums: [] + Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -560,19 +569,19 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out index 1b6adee..a8f097f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -140,188 +140,190 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_224] - Limit [LIM_223] (rows=100 width=88) + File Output Operator [FS_225] + Limit [LIM_224] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_222] (rows=1045432122 width=88) + Select Operator [SEL_223] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] - Select Operator [SEL_220] (rows=1045432122 width=88) + SHUFFLE [RS_222] + Select Operator [SEL_221] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_219] (rows=1045432122 width=88) + Group By Operator [GBY_220] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Group By Operator [GBY_62] (rows=2090864244 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Select Operator [SEL_61] (rows=2090864244 width=88) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_60] (rows=2090864244 width=88) - predicate:(_col15 is not null or _col17 is not null) - Merge Join Operator [MERGEJOIN_172] (rows=2090864244 width=88) - Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_210._col0(Left Outer),RS_55._col0=RS_218._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] - <-Reducer 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_168] (rows=96800003 width=860) - Conds:RS_50._col1=RS_181._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_181] - PartitionCols:_col0 - Select Operator [SEL_180] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_179] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_167] (rows=88000001 width=860) - Conds:RS_175._col2=RS_178._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] - PartitionCols:_col2 - Select Operator [SEL_174] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_173] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_178] - PartitionCols:_col0 - Select Operator [SEL_177] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_176] (rows=20000000 width=1014) - predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0 - Group By Operator [GBY_54] (rows=633595212 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=633595212 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_169] (rows=633595212 width=88) - Conds:RS_202._col0=RS_184._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_184] + Top N Key Operator [TNK_103] (rows=2090864244 width=88) + keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100 + Select Operator [SEL_61] (rows=2090864244 width=88) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_60] (rows=2090864244 width=88) + predicate:(_col15 is not null or _col17 is not null) + Merge Join Operator [MERGEJOIN_173] (rows=2090864244 width=88) + Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_211._col0(Left Outer),RS_55._col0=RS_219._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_55] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_169] (rows=96800003 width=860) + Conds:RS_50._col1=RS_182._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_182] + PartitionCols:_col0 + Select Operator [SEL_181] (rows=1861800 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_180] (rows=1861800 width=385) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_168] (rows=88000001 width=860) + Conds:RS_176._col2=RS_179._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_176] + PartitionCols:_col2 + Select Operator [SEL_175] (rows=80000000 width=860) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_174] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_179] + PartitionCols:_col0 + Select Operator [SEL_178] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_177] (rows=20000000 width=1014) + predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Group By Operator [GBY_54] (rows=633595212 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=633595212 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_170] (rows=633595212 width=88) + Conds:RS_203._col0=RS_185._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_185] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=4058 width=1119) + Output:["_col0"] + Filter Operator [FIL_183] (rows=4058 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + PartitionCols:_col0 + Select Operator [SEL_202] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_201] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_198] + Group By Operator [GBY_197] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + Group By Operator [GBY_191] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_186] (rows=4058 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_184] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_200] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"] + Select Operator [SEL_134] (rows=96800003 width=860) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_169] + <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_211] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=79201469 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_209] (rows=79201469 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_183] (rows=4058 width=1119) - Output:["_col0"] - Filter Operator [FIL_182] (rows=4058 width=1119) - predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + Group By Operator [GBY_29] (rows=158402938 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_171] (rows=158402938 width=135) + Conds:RS_208._col0=RS_187._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_187] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_184] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_206] (rows=144002668 width=135) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_205] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_195] + Group By Operator [GBY_192] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_188] (rows=4058 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_184] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_219] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=158394413 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_217] (rows=158394413 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0 - Select Operator [SEL_201] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_200] (rows=575995635 width=88) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_197] - Group By Operator [GBY_196] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] - Group By Operator [GBY_190] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_185] (rows=4058 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_183] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_199] - Group By Operator [GBY_198] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"] - Select Operator [SEL_133] (rows=96800003 width=860) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_168] - <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_210] - PartitionCols:_col0 - Select Operator [SEL_209] (rows=79201469 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_208] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Group By Operator [GBY_29] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_170] (rows=158402938 width=135) - Conds:RS_207._col0=RS_186._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_186] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_183] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] - PartitionCols:_col0 - Select Operator [SEL_206] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_194] - Group By Operator [GBY_191] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_187] (rows=4058 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_183] - <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_218] - PartitionCols:_col0 - Select Operator [SEL_217] (rows=158394413 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_216] (rows=158394413 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col0 - Group By Operator [GBY_43] (rows=316788826 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_171] (rows=316788826 width=135) - Conds:RS_215._col0=RS_188._col0(Inner),Output:["_col1"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_183] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - PartitionCols:_col0 - Select Operator [SEL_214] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_213] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_33] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_212] - Group By Operator [GBY_211] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] - Group By Operator [GBY_192] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_189] (rows=4058 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_183] + Group By Operator [GBY_43] (rows=316788826 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_172] (rows=316788826 width=135) + Conds:RS_216._col0=RS_189._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_189] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_184] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=287989836 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_214] (rows=287989836 width=135) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_33] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_196] + Group By Operator [GBY_193] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_190] (rows=4058 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_184]
