HIVE-17139: Conditional expressions optimization: skip the expression evaluation if the condition is not satisfied for vectorization engine. (Jia Ke, reviewed by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2c871e4a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2c871e4a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2c871e4a Branch: refs/heads/hive-14535 Commit: 2c871e4a75c844fd5a8eee061fe38f6a26ee38f0 Parents: f1050a6 Author: Ferdinand Xu <cheng.a...@intel.com> Authored: Thu Oct 12 09:22:37 2017 +0800 Committer: Ferdinand Xu <cheng.a...@intel.com> Committed: Thu Oct 12 09:22:37 2017 +0800 ---------------------------------------------------------------------- .../vector/expressions/IfExprColumnNull.java | 22 +- .../expressions/IfExprConditionalFilter.java | 192 ++++++++++ .../IfExprDoubleColumnDoubleColumn.java | 45 +-- .../IfExprIntervalDayTimeColumnColumn.java | 17 +- .../expressions/IfExprLongColumnLongColumn.java | 45 +-- .../vector/expressions/IfExprNullColumn.java | 16 +- ...fExprStringGroupColumnStringGroupColumn.java | 17 +- .../IfExprTimestampColumnColumnBase.java | 18 +- .../ql/exec/vector/udf/VectorUDFAdaptor.java | 14 +- .../queries/clientpositive/vectorized_case.q | 53 ++- .../clientpositive/llap/vectorized_case.q.out | 354 ++++++++++++++++++- .../clientpositive/spark/vectorized_case.q.out | 348 +++++++++++++++++- .../clientpositive/vectorized_case.q.out | 330 ++++++++++++++++- 13 files changed, 1283 insertions(+), 188 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index 8cae274..93e12ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -22,27 +22,20 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -public class IfExprColumnNull extends VectorExpression { +public class IfExprColumnNull extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private final int arg1Column; - private final int arg2Column; - private final int outputColumn; - public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.outputColumn = outputColumn; + super(arg1Column, arg2Column, -1, outputColumn); } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } - final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; final ColumnVector arg2ColVector = batch.cols[arg2Column]; final ColumnVector outputColVector = batch.cols[outputColumn]; @@ -93,17 +86,8 @@ public class IfExprColumnNull extends VectorExpression { } @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override public String vectorExpressionParameters() { return "col " + arg1Column + ", col "+ arg2Column + ", null"; } - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - throw new UnsupportedOperationException("Undefined descriptor"); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java new file mode 100644 index 0000000..97cade7 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * For conditional expressions, the{@code IfExprConditionalFilter} class updated + * the selected array of batch parameter after the conditional expression is executed. + * Then the remaining expression will only do the selected rows instead of all. + */ +public class IfExprConditionalFilter extends VectorExpression { + protected int arg1Column = -1; + protected int arg2Column = -1; + protected int arg3Column = -1; + protected int outputColumn = -1; + protected int arg2ColumnTmp = -1; + + public IfExprConditionalFilter() { + } + + public IfExprConditionalFilter(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + if(arg2Column == -1){ + this.arg2Column = arg3Column; + this.arg2ColumnTmp = -1; + } else{ + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.arg2ColumnTmp = arg2Column; + } + this.outputColumn = outputColumn; + } + + /** + * For If(expr1,expr2,expr3) expression, + * Firstly, save the previous selected vector, size and selectedInUse value of batch. + * Secondly evaluate the conditional expression and update the selected array of batch based + * on the result of conditional expression(1 denote done, 0 denote not done) + * Then evaluate the expr2 based on the updated selected. + * After the expr2 is executed, remove the indexes which have done in expr2. + * Last, evaluate the expr3 based on the updated selected. + * + * @param batch + * @param childExpressions the childExpressions need to be evaluated. + */ + public void evaluateIfConditionalExpr(VectorizedRowBatch batch, VectorExpression[] childExpressions) { + if (childExpressions != null) { + // Save the previous selected vector, size and selectedInUse value of batch. + int[] prevSelected = new int[batch.selected.length]; + int[] prevSelectedFalse = new int[batch.selected.length]; + int prevSize = batch.size; + boolean prevSelectInUse = batch.selectedInUse; + if (!batch.selectedInUse) { + for (int i = 0; i < batch.size; i++) { + prevSelected[i] = i; + } + System.arraycopy(batch.selected, 0, prevSelectedFalse, 0, batch.selected.length); + System.arraycopy(prevSelected, 0, batch.selected, 0, batch.size); + } else { + System.arraycopy(batch.selected, 0, prevSelected, 0, batch.selected.length); + } + + // Evaluate the conditional expression. + evaluateConditionalExpression(batch, childExpressions[0], + prevSize, prevSelectInUse); + if (childExpressions != null && childExpressions.length == 2) { + // If the length is 2, it has two situations:If(expr1,expr2,null) or + // If(expr1,null,expr3) distinguished by the indexes. + if (childExpressions[1].getOutputColumn() == arg2ColumnTmp) { + // Evaluate the expr2 expression. + childExpressions[1].evaluate(batch); + } else { + // Update the selected array of batch to remove the index of being done. + evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize); + // If(expr1,null,expr3), if the expr1 is false, expr3 will be evaluated. + childExpressions[1].evaluate(batch); + } + } else if (childExpressions != null && childExpressions.length == 3) { + // IF(expr1,expr2,expr3). expr1,expr2,expr3 are all the expression. + // Evaluate the expr2 expression. + childExpressions[1].evaluate(batch); + // Update the selected array of batch to remove the index of being done. + evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize); + // Evaluate the expr3 expression. + childExpressions[2].evaluate(batch); + } + // When evaluate all the expressions, restore the previous selected + // vector,size and selectedInUse value of batch. + batch.size = prevSize; + batch.selectedInUse = prevSelectInUse; + if(!prevSelectInUse){ + batch.selected = prevSelectedFalse; + } else{ + batch.selected = prevSelected; + } + } + } + + + /** + * Update the selected array of batch based on the conditional expression + * result, remove the index of being done. + * + * @param batch + * @param num the column num of conditional expression in batch cols + * @param prevSelected the previous selected array + */ + private static void evaluateSelectedArray(VectorizedRowBatch batch, int num, + int[] prevSelected, int prevSize) { + // Get the result of conditional expression. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[num]; + long[] flag = outputColVector.vector; + int newSize = 0; + // Update the selected array of batch + for (int j = 0; j < prevSize; j++) { + if (flag[prevSelected[j]] == 0) { + batch.selected[newSize++] = prevSelected[j]; + } + } + batch.size = newSize; + batch.selectedInUse = true; + } + + /** + * Evaluate the conditional expression and update the selected array of batch + * based on the result of conditional expression. + * + * @param batch + * @param ve the conditional expression need to evaluate + * @param prevSize the previous batch size + * @param prevSelectInUse the previous selectInUse + */ + private static void evaluateConditionalExpression(VectorizedRowBatch batch, + VectorExpression ve, int prevSize, + boolean prevSelectInUse) { + batch.size = prevSize; + batch.selectedInUse = prevSelectInUse; + int colNum = ve.getOutputColumn(); + // Evaluate the conditional expression. + ve.evaluate(batch); + LongColumnVector outputColVector = (LongColumnVector) batch.cols[colNum]; + long[] flag = outputColVector.vector; + int[] sel = batch.selected; + int newSize = 0; + // Update the selected array of the batch based on the conditional expression. + for (int j = 0; j < batch.size; j++) { + int k = sel[j]; + if (flag[k] == 1) { + sel[newSize++] = k; + } + } + if(newSize < batch.size ) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} + http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index 514b453..0e50a78 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -27,18 +27,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class IfExprDoubleColumnDoubleColumn extends VectorExpression { +public class IfExprDoubleColumnDoubleColumn extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private int arg1Column, arg2Column, arg3Column; - private int outputColumn; - public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; + super(arg1Column, arg2Column, arg3Column, outputColumn); } public IfExprDoubleColumnDoubleColumn() { @@ -48,7 +42,7 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -129,43 +123,10 @@ public class IfExprDoubleColumnDoubleColumn extends VectorExpression { } @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override public String getOutputType() { return "double"; } - public int getArg1Column() { - return arg1Column; - } - - public void setArg1Column(int colNum) { - this.arg1Column = colNum; - } - - public int getArg2Column() { - return arg2Column; - } - - public void setArg2Column(int colNum) { - this.arg2Column = colNum; - } - - public int getArg3Column() { - return arg3Column; - } - - public void setArg3Column(int colNum) { - this.arg3Column = colNum; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - @Override public String vectorExpressionParameters() { return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index 98fa29e..9627543 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -27,18 +27,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class IfExprIntervalDayTimeColumnColumn extends VectorExpression { +public class IfExprIntervalDayTimeColumnColumn extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private int arg1Column, arg2Column, arg3Column; - private int outputColumn; - public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; + super(arg1Column, arg2Column, arg3Column, outputColumn); } public IfExprIntervalDayTimeColumnColumn() { @@ -48,7 +42,7 @@ public class IfExprIntervalDayTimeColumnColumn extends VectorExpression { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -126,11 +120,6 @@ public class IfExprIntervalDayTimeColumnColumn extends VectorExpression { } @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override public String getOutputType() { return "interval_day_time"; } http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 4c6015e..744d8f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -26,18 +26,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class IfExprLongColumnLongColumn extends VectorExpression { +public class IfExprLongColumnLongColumn extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private int arg1Column, arg2Column, arg3Column; - private int outputColumn; - public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; + super(arg1Column, arg2Column, arg3Column, outputColumn); } public IfExprLongColumnLongColumn() { @@ -47,7 +41,7 @@ public class IfExprLongColumnLongColumn extends VectorExpression { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -128,43 +122,10 @@ public class IfExprLongColumnLongColumn extends VectorExpression { } @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override public String getOutputType() { return "long"; } - public int getArg1Column() { - return arg1Column; - } - - public void setArg1Column(int colNum) { - this.arg1Column = colNum; - } - - public int getArg2Column() { - return arg2Column; - } - - public void setArg2Column(int colNum) { - this.arg2Column = colNum; - } - - public int getArg3Column() { - return arg3Column; - } - - public void setArg3Column(int colNum) { - this.arg3Column = colNum; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - @Override public String vectorExpressionParameters() { return "col " + arg1Column + ", col "+ arg2Column + ", col "+ arg3Column; http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 156fcc4..842d620 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -22,25 +22,19 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -public class IfExprNullColumn extends VectorExpression { +public class IfExprNullColumn extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private final int arg1Column; - private final int arg2Column; - private final int outputColumn; - public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.outputColumn = outputColumn; + super(arg1Column, -1, arg2Column, outputColumn); } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -102,8 +96,4 @@ public class IfExprNullColumn extends VectorExpression { return "col " + arg1Column + ", null, col "+ arg2Column; } - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - throw new UnsupportedOperationException("Undefined descriptor"); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index c8367c6..eae2046 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -29,18 +29,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; * The first is always a boolean (LongColumnVector). * The second and third are string columns or string expression results. */ -public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression { +public class IfExprStringGroupColumnStringGroupColumn extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private int arg1Column, arg2Column, arg3Column; - private int outputColumn; - public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; + super(arg1Column, arg2Column, arg3Column, outputColumn); } public IfExprStringGroupColumnStringGroupColumn() { @@ -51,7 +45,7 @@ public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression { public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -167,11 +161,6 @@ public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression { } @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override public String getOutputType() { return "String"; } http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index 8219b3c..b45259d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -26,18 +26,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public abstract class IfExprTimestampColumnColumnBase extends VectorExpression { +public abstract class IfExprTimestampColumnColumnBase extends IfExprConditionalFilter { private static final long serialVersionUID = 1L; - private int arg1Column, arg2Column, arg3Column; - private int outputColumn; - public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; + super(arg1Column, arg2Column, arg3Column, outputColumn); } public IfExprTimestampColumnColumnBase() { @@ -45,9 +39,8 @@ public abstract class IfExprTimestampColumnColumnBase extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); + super.evaluateIfConditionalExpr(batch, childExpressions); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -125,11 +118,6 @@ public abstract class IfExprTimestampColumnColumnBase extends VectorExpression { } @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override public String getOutputType() { return "long"; } http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 14ba646..7f91e5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -31,9 +31,11 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprConditionalFilter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; @@ -61,6 +63,7 @@ public class VectorUDFAdaptor extends VectorExpression { private String resultType; private VectorUDFArgDesc[] argDescs; private ExprNodeGenericFuncDesc expr; + private IfExprConditionalFilter cf; private transient GenericUDF genericUDF; private transient GenericUDF.DeferredObject[] deferredChildren; @@ -104,6 +107,11 @@ public class VectorUDFAdaptor extends VectorExpression { outputVectorAssignRow.init(outputTypeInfo, outputColumn); genericUDF.initialize(childrenOIs); + if((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName())){ + cf = new IfExprConditionalFilter + (argDescs[0].getColumnNum(), argDescs[1].getColumnNum(), + argDescs[2].getColumnNum(), outputColumn); + } // Initialize constant arguments for (int i = 0; i < argDescs.length; i++) { @@ -125,7 +133,11 @@ public class VectorUDFAdaptor extends VectorExpression { } if (childExpressions != null) { - super.evaluateChildren(batch); + if ((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName()) && cf != null) { + cf.evaluateIfConditionalExpr(batch, childExpressions); + } else { + super.evaluateChildren(batch); + } } int[] sel = batch.selected; http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/test/queries/clientpositive/vectorized_case.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q index 2f6810f..3c48607 100644 --- a/ql/src/test/queries/clientpositive/vectorized_case.q +++ b/ql/src/test/queries/clientpositive/vectorized_case.q @@ -56,20 +56,65 @@ or csmallint = 12205 or csmallint = 10583 ; explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc; -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc; explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc; -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc; + +-- add test for VectorUDFAdaptor call IfExprConditionalFilter +CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC; + +INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0); +--for length=3 +EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; + +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; + +--for length=2 and the expr2 is null +EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; + +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; + +--for length=2 and the expr3 is null +EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; + +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; + +-- add test for IF**.java call IfExprConditionalFilter +CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC; + +INSERT INTO test_2 VALUES (3,1),(2,2),(1,3); + +--for length=3 +EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; + +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; + +--for length=2 and the expression2 is null +EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; + +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; + +--for length=2 and the expression3 is null +EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; + +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 5559c92..83c6624 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -241,13 +241,13 @@ STAGE PLANS: ListSink PREHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc @@ -356,14 +356,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc @@ -372,13 +372,13 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 5110 4607 PREHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc @@ -487,14 +487,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc @@ -502,3 +502,341 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 248718130534 1995744891643 +PREHOOK: query: CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_1 +POSTHOOK: query: INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +3 +4 +4 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +3 +4 +1 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +2 +2 +4 +PREHOOK: query: CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: INSERT INTO test_2 VALUES (3,1),(2,2),(1,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_2 +POSTHOOK: query: INSERT INTO test_2 VALUES (3,1),(2,2),(1,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.attr EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +3 +4 +4 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +3 +4 +NULL +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +NULL +NULL +4 http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 24135d2..bb1bd19 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -239,13 +239,13 @@ STAGE PLANS: ListSink PREHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc @@ -352,14 +352,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc @@ -368,13 +368,13 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 5110 4607 PREHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc @@ -481,14 +481,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc @@ -496,3 +496,335 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 248718130534 1995744891643 +PREHOOK: query: CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_1 +POSTHOOK: query: INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +3 +4 +4 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +3 +4 +1 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +2 +2 +4 +PREHOOK: query: CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: INSERT INTO test_2 VALUES (3,1),(2,2),(1,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_2 +POSTHOOK: query: INSERT INTO test_2 VALUES (3,1),(2,2),(1,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.attr EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +3 +4 +4 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +3 +4 +NULL +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +NULL +NULL +4 http://git-wip-us.apache.org/repos/asf/hive/blob/2c871e4a/ql/src/test/results/clientpositive/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out index 8646c0b..ba23230 100644 --- a/ql/src/test/results/clientpositive/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/vectorized_case.q.out @@ -233,13 +233,13 @@ STAGE PLANS: ListSink PREHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc @@ -332,14 +332,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc @@ -348,13 +348,13 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 5110 4607 PREHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression -select +select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc @@ -447,14 +447,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc @@ -462,3 +462,317 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 248718130534 1995744891643 +PREHOOK: query: CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_1 +POSTHOOK: query: INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +3 +4 +4 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +3 +4 +1 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +2 +2 +4 +PREHOOK: query: CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: INSERT INTO test_2 VALUES (3,1),(2,2),(1,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_2 +POSTHOOK: query: INSERT INTO test_2 VALUES (3,1),(2,2),(1,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.attr EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +3 +4 +4 +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +3 +4 +NULL +PREHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +NULL +NULL +4