This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-3 by this push: new fa4c8305be6 HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row number after one batch in case of constant expression in partition clause (Shubham Chaurasia via Ramesh Kumar) fa4c8305be6 is described below commit fa4c8305be64ecc9510ab2bc76d2413e9287597a Author: Diksha628 <43694846+diksha...@users.noreply.github.com> AuthorDate: Tue Sep 12 17:25:54 2023 +0530 HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row number after one batch in case of constant expression in partition clause (Shubham Chaurasia via Ramesh Kumar) Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#4661) --- .../hive/ql/exec/vector/ptf/VectorPTFOperator.java | 4 +- .../clientpositive/vector_windowing_row_number.q | 75 ++ .../vector_windowing_row_number.q.out | 912 +++++++++++++++++++++ 3 files changed, 989 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java index 39fab2cba2b..f401cf7faef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -413,8 +413,8 @@ public class VectorPTFOperator extends Operator<PTFDesc> groupBatches.fillGroupResultsAndForward(this, batch); } - // If we are only processing a PARTITION BY, reset our evaluators. - if (!isPartitionOrderBy) { + // If we are only processing a PARTITION BY and isLastGroupBatch, reset our evaluators. + if (!isPartitionOrderBy && isLastGroupBatch) { groupBatches.resetEvaluators(); } } diff --git a/ql/src/test/queries/clientpositive/vector_windowing_row_number.q b/ql/src/test/queries/clientpositive/vector_windowing_row_number.q new file mode 100644 index 00000000000..673a9ad3d44 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_row_number.q @@ -0,0 +1,75 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table row_number_test; + +create table row_number_test as select explode(split(repeat("w,", 2400), ",")); + +insert into row_number_test select explode(split(repeat("x,", 1200), ",")); + +insert into row_number_test select explode(split(repeat("y,", 700), ",")); + +insert into row_number_test select explode(split(repeat("z,", 600), ",")); + +explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test; + +create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +SET hive.vectorized.execution.enabled=false; +SET hive.vectorized.execution.reduce.enabled=false; +set hive.vectorized.execution.ptf.enabled=false; + +explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test; + +create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +-- compare results of vectorized with those of non-vectorized execution + +select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists; + +drop table row_numbers_non_vectorized; +drop table row_numbers_vectorized; +drop table row_number_test; diff --git a/ql/src/test/results/clientpositive/vector_windowing_row_number.q.out b/ql/src/test/results/clientpositive/vector_windowing_row_number.q.out new file mode 100644 index 00000000000..17841991226 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_windowing_row_number.q.out @@ -0,0 +1,912 @@ +PREHOOK: query: drop table row_number_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table row_number_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table row_number_test as select explode(split(repeat("w,", 2400), ",")) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@row_number_test +POSTHOOK: query: create table row_number_test as select explode(split(repeat("w,", 2400), ",")) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: insert into row_number_test select explode(split(repeat("x,", 1200), ",")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@row_number_test +POSTHOOK: query: insert into row_number_test select explode(split(repeat("x,", 1200), ",")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: insert into row_number_test select explode(split(repeat("y,", 700), ",")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@row_number_test +POSTHOOK: query: insert into row_number_test select explode(split(repeat("y,", 700), ",")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: insert into row_number_test select explode(split(repeat("z,", 600), ",")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@row_number_test +POSTHOOK: query: insert into row_number_test select explode(split(repeat("z,", 600), ",")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +PREHOOK: type: QUERY +PREHOOK: Input: default@row_number_test +#### A masked pattern was here #### +POSTHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@row_number_test +#### A masked pattern was here #### +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: row_number_test + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: col (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_0 (type: int), _col0 (type: string) + outputColumnNames: row_number_window_0, _col0 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_1 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_1 (type: int), _col0 (type: int), _col1 (type: string) + outputColumnNames: row_number_window_1, _col0, _col1 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_1 (type: int), _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: row_number_window_2 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: row_number_window_2, _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS LAST + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: row_number_window_3 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: row_number_window_3, _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_4 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: string) + outputColumnNames: row_number_window_4, _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: string), 2 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_5 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: row_number_window_5, _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), 2 (type: int) + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: int, _col6: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_6 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col4 (type: int), _col3 (type: int), _col2 (type: int), _col1 (type: int), _col0 (type: int), row_number_window_6 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@row_number_test +PREHOOK: Output: database:default +PREHOOK: Output: default@row_numbers_vectorized +POSTHOOK: query: create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@row_number_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@row_numbers_vectorized +POSTHOOK: Lineage: row_numbers_vectorized.col SIMPLE [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r1 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r2 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r3 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r4 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r5 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r6 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r7 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +r1 r2 r3 r4 r5 r6 r7 col +PREHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +PREHOOK: type: QUERY +PREHOOK: Input: default@row_number_test +#### A masked pattern was here #### +POSTHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@row_number_test +#### A masked pattern was here #### +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: row_number_test + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: col (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_0 (type: int), _col0 (type: string) + outputColumnNames: row_number_window_0, _col0 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_1 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_1 (type: int), _col0 (type: int), _col1 (type: string) + outputColumnNames: row_number_window_1, _col0, _col1 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_1 (type: int), _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: row_number_window_2 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: row_number_window_2, _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS LAST + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: row_number_window_3 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: row_number_window_3, _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_4 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: string) + outputColumnNames: row_number_window_4, _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: string), 2 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_5 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: row_number_window_5, _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), 2 (type: int) + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + value expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: int, _col6: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_6 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col4 (type: int), _col3 (type: int), _col2 (type: int), _col1 (type: int), _col0 (type: int), row_number_window_6 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4904 Data size: 4900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@row_number_test +PREHOOK: Output: database:default +PREHOOK: Output: default@row_numbers_non_vectorized +POSTHOOK: query: create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@row_number_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@row_numbers_non_vectorized +POSTHOOK: Lineage: row_numbers_non_vectorized.col SIMPLE [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r1 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r2 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r3 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r4 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r5 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r6 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r7 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +r1 r2 r3 r4 r5 r6 r7 col +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@row_numbers_non_vectorized +PREHOOK: Input: default@row_numbers_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@row_numbers_non_vectorized +POSTHOOK: Input: default@row_numbers_vectorized +#### A masked pattern was here #### +diff_exists +false +PREHOOK: query: drop table row_numbers_non_vectorized +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@row_numbers_non_vectorized +PREHOOK: Output: default@row_numbers_non_vectorized +POSTHOOK: query: drop table row_numbers_non_vectorized +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@row_numbers_non_vectorized +POSTHOOK: Output: default@row_numbers_non_vectorized +PREHOOK: query: drop table row_numbers_vectorized +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@row_numbers_vectorized +PREHOOK: Output: default@row_numbers_vectorized +POSTHOOK: query: drop table row_numbers_vectorized +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@row_numbers_vectorized +POSTHOOK: Output: default@row_numbers_vectorized +PREHOOK: query: drop table row_number_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@row_number_test +PREHOOK: Output: default@row_number_test +POSTHOOK: query: drop table row_number_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@row_number_test +POSTHOOK: Output: default@row_number_test