Repository: hive Updated Branches: refs/heads/master 4cb87670e -> 0e62d3dcb
HIVE-15978: Support regr_* functions (Zoltan Haindrich, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0e62d3dc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0e62d3dc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0e62d3dc Branch: refs/heads/master Commit: 0e62d3dcb9e7945f140fc17fe8eca628579d5385 Parents: 4cb8767 Author: Zoltan Haindrich <k...@rxd.hu> Authored: Thu Mar 16 18:59:10 2017 +0100 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Thu Mar 16 19:42:01 2017 +0100 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 10 + .../generic/GenericUDAFBinarySetFunctions.java | 464 +++++++++++++++++++ .../TestGenericUDAFBinarySetFunctions.java | 416 +++++++++++++++++ .../clientpositive/udaf_binarysetfunctions.q | 57 +++ .../results/clientpositive/show_functions.q.out | 10 + .../udaf_binarysetfunctions.q.out | 464 +++++++++++++++++++ 6 files changed, 1421 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 4ac25c2..e3ace2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -418,6 +418,16 @@ public final class FunctionRegistry { system.registerGenericUDAF("covar_pop", new GenericUDAFCovariance()); system.registerGenericUDAF("covar_samp", new GenericUDAFCovarianceSample()); system.registerGenericUDAF("corr", new GenericUDAFCorrelation()); + system.registerGenericUDAF("regr_slope", new GenericUDAFBinarySetFunctions.RegrSlope()); + system.registerGenericUDAF("regr_intercept", new GenericUDAFBinarySetFunctions.RegrIntercept()); + system.registerGenericUDAF("regr_r2", new GenericUDAFBinarySetFunctions.RegrR2()); + system.registerGenericUDAF("regr_sxx", new GenericUDAFBinarySetFunctions.RegrSXX()); + system.registerGenericUDAF("regr_syy", new GenericUDAFBinarySetFunctions.RegrSYY()); + system.registerGenericUDAF("regr_sxy", new GenericUDAFBinarySetFunctions.RegrSXY()); + system.registerGenericUDAF("regr_avgx", new GenericUDAFBinarySetFunctions.RegrAvgX()); + system.registerGenericUDAF("regr_avgy", new GenericUDAFBinarySetFunctions.RegrAvgY()); + system.registerGenericUDAF("regr_count", new GenericUDAFBinarySetFunctions.RegrCount()); + system.registerGenericUDAF("histogram_numeric", new GenericUDAFHistogramNumeric()); system.registerGenericUDAF("percentile_approx", new GenericUDAFPercentileApprox()); system.registerGenericUDAF("collect_set", new GenericUDAFCollectSet()); http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java new file mode 100644 index 0000000..e799a94 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java @@ -0,0 +1,464 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage.GenericUDAFAverageEvaluatorDouble; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage.GenericUDAFAverageEvaluatorDecimal; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation.GenericUDAFCorrelationEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount.GenericUDAFCountEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance.GenericUDAFVarianceEvaluator; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +public class GenericUDAFBinarySetFunctions extends AbstractGenericUDAFResolver { + + @Description(name = "regr_count", value = "_FUNC_(y,x) - returns the number of non-null pairs", extended = "The function takes as arguments any pair of numeric types and returns a long.\n" + + "Any pair with a NULL is ignored.") + public static class RegrCount extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + private static class Evaluator extends GenericUDAFCountEvaluator { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[0] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[0] }); + } + } + } + + @Description(name = "regr_sxx", value = "_FUNC_(y,x) - auxiliary analytic function", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "Otherwise, it computes the following:\n" + + " SUM(x*x)-SUM(x)*SUM(x)/N\n") + public static class RegrSXX extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + private static class Evaluator extends GenericUDAFVarianceEvaluator { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[1] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[1] }); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StdAgg myagg = (StdAgg) agg; + if (myagg.count == 0) { + return null; + } else { + DoubleWritable result = getResult(); + result.set(myagg.variance); + return result; + } + } + } + } + + @Description(name = "regr_syy", value = "_FUNC_(y,x) - auxiliary analytic function", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "Otherwise, it computes the following:\n" + + " SUM(y*y)-SUM(y)*SUM(y)/N\n") + public static class RegrSYY extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + private static class Evaluator extends GenericUDAFVarianceEvaluator { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[0] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[0] }); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StdAgg myagg = (StdAgg) agg; + if (myagg.count == 0) { + return null; + } else { + DoubleWritable result = getResult(); + result.set(myagg.variance); + return result; + } + } + } + } + + @Description(name = "regr_avgx", value = "_FUNC_(y,x) - evaluates the average of the independent variable", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "Otherwise, it computes the following:\n" + + " AVG(X)") + public static class RegrAvgX extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + if (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory() == PrimitiveCategory.DECIMAL) { + return new EvaluatorDecimal(); + } else { + return new EvaluatorDouble(); + } + } + + private static class EvaluatorDouble extends GenericUDAFAverageEvaluatorDouble { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[1] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[1] }); + } + } + + private static class EvaluatorDecimal extends GenericUDAFAverageEvaluatorDecimal { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[1] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[1] }); + } + } + } + + @Description(name = "regr_avgy", value = "_FUNC_(y,x) - evaluates the average of the dependent variable", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "Otherwise, it computes the following:\n" + + " AVG(Y)") + public static class RegrAvgY extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + if (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory() == PrimitiveCategory.DECIMAL) { + return new EvaluatorDecimal(); + } else { + return new EvaluatorDouble(); + } + } + + private static class EvaluatorDouble extends GenericUDAFAverageEvaluatorDouble { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[0] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[0] }); + } + } + + private static class EvaluatorDecimal extends GenericUDAFAverageEvaluatorDecimal { + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + switch (m) { + case COMPLETE: + case PARTIAL1: + return super.init(m, new ObjectInspector[] { parameters[0] }); + default: + return super.init(m, parameters); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + if (parameters[0] == null || parameters[1] == null) + return; + super.iterate(agg, new Object[] { parameters[0] }); + } + } + } + + @Description(name = "regr_slope", value = "_FUNC_(y,x) - returns the slope of the linear regression line", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned (the fit would be a vertical).\n" + + "Otherwise, it computes the following:\n" + + " (N*SUM(x*y)-SUM(x)*SUM(y)) / (N*SUM(x*x)-SUM(x)*SUM(x))") + public static class RegrSlope extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + /** + * NOTE: corr is declared as corr(x,y) instead corr(y,x) + */ + private static class Evaluator extends GenericUDAFCorrelationEvaluator { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StdAgg myagg = (StdAgg) agg; + + if (myagg.count < 2 || myagg.yvar == 0.0d) { + return null; + } else { + getResult().set(myagg.covar / myagg.yvar); + return getResult(); + } + } + } + } + + @Description(name = "regr_r2", value = "_FUNC_(y,x) - returns the coefficient of determination (also called R-squared or goodness of fit) for the regression line.", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned.\n" + + "If N*SUM(y*y) = SUM(y)*SUM(y): 1 is returned.\n" + + "Otherwise, it computes the following:\n" + + " POWER( N*SUM(x*y)-SUM(x)*SUM(y) ,2) / ( (N*SUM(x*x)-SUM(x)*SUM(x)) * (N*SUM(y*y)-SUM(y)*SUM(y)) )") + public static class RegrR2 extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + /** + * NOTE: corr is declared as corr(x,y) instead corr(y,x) + */ + private static class Evaluator extends GenericUDAFCorrelationEvaluator { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StdAgg myagg = (StdAgg) agg; + + if (myagg.count < 2 || myagg.yvar == 0.0d) { + return null; + } + DoubleWritable result = getResult(); + if (myagg.xvar == 0.0d) { + result.set(1.0d); + } else { + result.set(myagg.covar * myagg.covar / myagg.yvar / myagg.xvar); + } + return result; + } + } + } + + @Description(name = "regr_sxy", value = "_FUNC_(y,x) - return a value that can be used to evaluate the statistical validity of a regression model.", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned.\n" + + "Otherwise, it computes the following:\n" + + " SUM(x*y)-SUM(x)*SUM(y)/N") + public static class RegrSXY extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + /** + * NOTE: corr is declared as corr(x,y) instead corr(y,x) + */ + private static class Evaluator extends GenericUDAFCorrelationEvaluator { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StdAgg myagg = (StdAgg) agg; + + if (myagg.count == 0) { + return null; + } + DoubleWritable result = getResult(); + result.set(myagg.covar); + return result; + } + } + } + + @Description(name = "regr_intercept", value = "_FUNC_(y,x) - returns the y-intercept of the regression line.", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" + + "Any pair with a NULL is ignored.\n" + + "If applied to an empty set: NULL is returned.\n" + + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned.\n" + + "Otherwise, it computes the following:\n" + + " ( SUM(y)*SUM(x*x)-SUM(X)*SUM(x*y) ) / ( N*SUM(x*x)-SUM(x)*SUM(x) )") + public static class RegrIntercept extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + checkArgumentTypes(parameters); + return new Evaluator(); + } + + /** + * NOTE: corr is declared as corr(x,y) instead corr(y,x) + */ + private static class Evaluator extends GenericUDAFCorrelationEvaluator { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + StdAgg myagg = (StdAgg) agg; + + if (myagg.count == 0) { + return null; + } + DoubleWritable result = getResult(); + double slope = myagg.covar / myagg.yvar; + result.set(myagg.xavg - slope * myagg.yavg); + return result; + } + } + } + + private static void checkArgumentTypes(TypeInfo[] parameters) throws UDFArgumentTypeException { + if (parameters.length != 2) { + throw new UDFArgumentTypeException(parameters.length - 1, + "Exactly two arguments are expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + + if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " + + parameters[1].getTypeName() + " is passed."); + } + + if (!acceptedPrimitiveCategory(((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory())) { + throw new UDFArgumentTypeException(0, "Only numeric type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + + } + if (!acceptedPrimitiveCategory(((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory())) { + throw new UDFArgumentTypeException(1, "Only numeric type arguments are accepted but " + + parameters[1].getTypeName() + " is passed."); + } + } + + private static boolean acceptedPrimitiveCategory(PrimitiveCategory primitiveCategory) { + switch (primitiveCategory) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case TIMESTAMP: + case DECIMAL: + return true; + default: + return false; + } + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java new file mode 100644 index 0000000..584caf1 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java @@ -0,0 +1,416 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.LongWritable; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import jersey.repackaged.com.google.common.collect.Lists; + +@RunWith(Parameterized.class) +public class TestGenericUDAFBinarySetFunctions { + + private List<Object[]> rowSet; + + @Parameters(name = "{0}") + public static List<Object[]> getParameters() { + List<Object[]> ret = new ArrayList<>(); + ret.add(new Object[] { "seq/seq", RowSetGenerator.generate(10, + new RowSetGenerator.DoubleSequence(0), new RowSetGenerator.DoubleSequence(0)) }); + ret.add(new Object[] { "seq/ones", RowSetGenerator.generate(10, + new RowSetGenerator.DoubleSequence(0), new RowSetGenerator.ConstantSequence(1.0)) }); + ret.add(new Object[] { "ones/seq", RowSetGenerator.generate(10, + new RowSetGenerator.ConstantSequence(1.0), new RowSetGenerator.DoubleSequence(0)) }); + ret.add(new Object[] { "empty", RowSetGenerator.generate(0, + new RowSetGenerator.DoubleSequence(0), new RowSetGenerator.DoubleSequence(0)) }); + ret.add(new Object[] { "lonely", RowSetGenerator.generate(1, + new RowSetGenerator.DoubleSequence(0), new RowSetGenerator.DoubleSequence(0)) }); + ret.add(new Object[] { "seq/seq+10", RowSetGenerator.generate(10, + new RowSetGenerator.DoubleSequence(0), new RowSetGenerator.DoubleSequence(10)) }); + ret.add(new Object[] { "seq/null", RowSetGenerator.generate(10, + new RowSetGenerator.DoubleSequence(0), new RowSetGenerator.ConstantSequence(null)) }); + ret.add(new Object[] { "null/seq0", RowSetGenerator.generate(10, + new RowSetGenerator.ConstantSequence(null), new RowSetGenerator.DoubleSequence(0)) }); + return ret; + } + + public static class GenericUDAFExecutor { + + private GenericUDAFResolver2 evaluatorFactory; + private GenericUDAFParameterInfo info; + private ObjectInspector[] partialOIs; + + public GenericUDAFExecutor(GenericUDAFResolver2 evaluatorFactory, GenericUDAFParameterInfo info) + throws Exception { + this.evaluatorFactory = evaluatorFactory; + this.info = info; + + GenericUDAFEvaluator eval0 = evaluatorFactory.getEvaluator(info); + partialOIs = new ObjectInspector[] { + eval0.init(GenericUDAFEvaluator.Mode.PARTIAL1, info.getParameterObjectInspectors()) }; + + } + + List<Object> run(List<Object[]> values) throws Exception { + Object r1 = runComplete(values); + Object r2 = runPartialFinal(values); + Object r3 = runPartial2Final(values); + return Lists.newArrayList(r1, r2, r3); + } + + private Object runComplete(List<Object[]> values) throws SemanticException, HiveException { + GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info); + eval.init(GenericUDAFEvaluator.Mode.COMPLETE, info.getParameterObjectInspectors()); + AggregationBuffer agg = eval.getNewAggregationBuffer(); + for (Object[] parameters : values) { + eval.iterate(agg, parameters); + } + return eval.terminate(agg); + } + + private Object runPartialFinal(List<Object[]> values) throws Exception { + GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info); + eval.init(GenericUDAFEvaluator.Mode.FINAL, partialOIs); + AggregationBuffer buf = eval.getNewAggregationBuffer(); + for (Object partialResult : runPartial1(values)) { + eval.merge(buf, partialResult); + } + return eval.terminate(buf); + } + + private Object runPartial2Final(List<Object[]> values) throws Exception { + GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info); + eval.init(GenericUDAFEvaluator.Mode.FINAL, partialOIs); + AggregationBuffer buf = eval.getNewAggregationBuffer(); + for (Object partialResult : runPartial2(runPartial1(values))) { + eval.merge(buf, partialResult); + } + return eval.terminate(buf); + } + + private List<Object> runPartial1(List<Object[]> values) throws Exception { + List<Object> ret = new ArrayList<>(); + int batchSize = 1; + Iterator<Object[]> iter = values.iterator(); + do { + GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info); + eval.init(GenericUDAFEvaluator.Mode.PARTIAL1, info.getParameterObjectInspectors()); + AggregationBuffer buf = eval.getNewAggregationBuffer(); + for (int i = 0; i < batchSize - 1 && iter.hasNext(); i++) { + eval.iterate(buf, iter.next()); + } + batchSize <<= 1; + ret.add(eval.terminatePartial(buf)); + + // back-check to force at least 1 output; and this should have a partial which is empty + } while (iter.hasNext()); + return ret; + } + + private List<Object> runPartial2(List<Object> values) throws Exception { + List<Object> ret = new ArrayList<>(); + int batchSize = 1; + Iterator<Object> iter = values.iterator(); + do { + GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info); + eval.init(GenericUDAFEvaluator.Mode.PARTIAL2, partialOIs); + AggregationBuffer buf = eval.getNewAggregationBuffer(); + for (int i = 0; i < batchSize - 1 && iter.hasNext(); i++) { + eval.merge(buf, iter.next()); + } + batchSize <<= 1; + ret.add(eval.terminatePartial(buf)); + + // back-check to force at least 1 output; and this should have a partial which is empty + } while (iter.hasNext()); + return ret; + } + } + + public static class RowSetGenerator { + public static interface FieldGenerator { + public Object apply(int rowIndex); + } + + public static class ConstantSequence implements FieldGenerator { + private Object constant; + + public ConstantSequence(Object constant) { + this.constant = constant; + } + + @Override + public Object apply(int rowIndex) { + return constant; + } + } + + public static class DoubleSequence implements FieldGenerator { + + private int offset; + + public DoubleSequence(int offset) { + this.offset = offset; + } + + @Override + public Object apply(int rowIndex) { + double d = rowIndex + offset; + return d; + } + } + + public static List<Object[]> generate(int numRows, FieldGenerator... generators) { + ArrayList<Object[]> ret = new ArrayList<>(numRows); + for (int rowIdx = 0; rowIdx < numRows; rowIdx++) { + ArrayList<Object> row = new ArrayList<>(); + for (FieldGenerator g : generators) { + row.add(g.apply(rowIdx)); + } + ret.add(row.toArray()); + } + return ret; + } + } + + public TestGenericUDAFBinarySetFunctions(String label, List<Object[]> rowSet) { + this.rowSet = rowSet; + } + + @Test + public void regr_count() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.count(), new GenericUDAFBinarySetFunctions.RegrCount()); + } + + @Test + public void regr_sxx() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.sxx(), new GenericUDAFBinarySetFunctions.RegrSXX()); + } + + @Test + public void regr_syy() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.syy(), new GenericUDAFBinarySetFunctions.RegrSYY()); + } + + @Test + public void regr_sxy() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.sxy(), new GenericUDAFBinarySetFunctions.RegrSXY()); + } + + @Test + public void regr_avgx() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.avgx(), new GenericUDAFBinarySetFunctions.RegrAvgX()); + } + + @Test + public void regr_avgy() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.avgy(), new GenericUDAFBinarySetFunctions.RegrAvgY()); + } + + @Test + public void regr_slope() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.slope(), new GenericUDAFBinarySetFunctions.RegrSlope()); + } + + @Test + public void regr_r2() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.r2(), new GenericUDAFBinarySetFunctions.RegrR2()); + } + + @Test + public void regr_intercept() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.intercept(), new GenericUDAFBinarySetFunctions.RegrIntercept()); + } + + @Test + @Ignore("HIVE-16178 should fix this") + public void corr() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.corr(), new GenericUDAFCorrelation()); + } + + @Test + public void covar_pop() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.covar_pop(), new GenericUDAFCovariance()); + } + + @Test + @Ignore("HIVE-16178 should fix this") + public void covar_samp() throws Exception { + RegrIntermediate expected = RegrIntermediate.computeFor(rowSet); + validateUDAF(expected.covar_samp(), new GenericUDAFCovarianceSample()); + } + + private void validateUDAF(Double expectedResult, GenericUDAFResolver2 udaf) throws Exception { + ObjectInspector[] params = + new ObjectInspector[] { javaDoubleObjectInspector, javaDoubleObjectInspector }; + GenericUDAFParameterInfo gpi = new SimpleGenericUDAFParameterInfo(params, false, false, false); + GenericUDAFExecutor executor = new GenericUDAFExecutor(udaf, gpi); + + List<Object> values = executor.run(rowSet); + + if (expectedResult == null) { + for (Object v : values) { + assertNull(v); + } + } else { + for (Object v : values) { + if (v instanceof DoubleWritable) { + assertEquals(expectedResult, ((DoubleWritable) v).get(), 1e-10); + } else { + assertEquals(expectedResult, ((LongWritable) v).get(), 1e-10); + } + } + } + } + + static class RegrIntermediate { + public double sum_x2, sum_y2; + public double sum_x, sum_y; + public double sum_xy; + public double n; + + public void add(Double y, Double x) { + if (x == null || y == null) { + return; + } + sum_x2 += x * x; + sum_y2 += y * y; + sum_x += x; + sum_y += y; + sum_xy += x * y; + n++; + } + + public Double intercept() { + double xx = n * sum_x2 - sum_x * sum_x; + if (n == 0) + return null; + return (sum_y * sum_x2 - sum_x * sum_xy) / xx; + } + + public Double sxy() { + if (n == 0) + return null; + return sum_xy - sum_x * sum_y / n; + } + + public Double covar_pop() { + if (n == 0) + return null; + return (sum_xy - sum_x * sum_y / n) / n; + } + + public Double covar_samp() { + if (n <= 1) + return null; + return (sum_xy - sum_x * sum_y / n) / (n - 1); + } + + public Double corr() { + double xx = n * sum_x2 - sum_x * sum_x; + double yy = n * sum_y2 - sum_y * sum_y; + if (n == 0 || xx == 0.0d || yy == 0.0d) + return null; + double c = n * sum_xy - sum_x * sum_y; + return Math.sqrt(c * c / xx / yy); + } + + public Double r2() { + double xx = n * sum_x2 - sum_x * sum_x; + double yy = n * sum_y2 - sum_y * sum_y; + if (n == 0 || xx == 0.0d) + return null; + if (yy == 0.0d) + return 1.0d; + double c = n * sum_xy - sum_x * sum_y; + return c * c / xx / yy; + } + + public Double slope() { + if (n == 0 || n * sum_x2 == sum_x * sum_x) + return null; + return (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x); + } + + public Double avgx() { + if (n == 0) + return null; + return sum_x / n; + } + + public Double avgy() { + if (n == 0) + return null; + return sum_y / n; + } + + public Double count() { + return n; + } + + public Double sxx() { + if (n == 0) + return null; + return sum_x2 - sum_x * sum_x / n; + } + + public Double syy() { + if (n == 0) + return null; + return sum_y2 - sum_y * sum_y / n; + } + + public static RegrIntermediate computeFor(List<Object[]> rows) { + RegrIntermediate ri = new RegrIntermediate(); + for (Object[] objects : rows) { + ri.add((Double) objects[0], (Double) objects[1]); + } + return ri; + } + + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q new file mode 100644 index 0000000..2039312 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q @@ -0,0 +1,57 @@ +create table t (id int,px int,y decimal,x decimal); + +insert into t values (101,1,1,1); +insert into t values (201,2,1,1); +insert into t values (301,3,1,1); +insert into t values (401,4,1,11); +insert into t values (501,5,1,null); +insert into t values (601,6,null,1); +insert into t values (701,6,null,null); +insert into t values (102,1,2,2); +insert into t values (202,2,1,2); +insert into t values (302,3,2,1); +insert into t values (402,4,2,12); +insert into t values (502,5,2,null); +insert into t values (602,6,null,2); +insert into t values (702,6,null,null); +insert into t values (103,1,3,3); +insert into t values (203,2,1,3); +insert into t values (303,3,3,1); +insert into t values (403,4,3,13); +insert into t values (503,5,3,null); +insert into t values (603,6,null,3); +insert into t values (703,6,null,null); +insert into t values (104,1,4,4); +insert into t values (204,2,1,4); +insert into t values (304,3,4,1); +insert into t values (404,4,4,14); +insert into t values (504,5,4,null); +insert into t values (604,6,null,4); +insert into t values (704,6,null,null); +insert into t values (800,7,1,1); + + +explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x), +regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x) + from t group by px order by px; + +select px, + round( var_pop(x),5), + round( var_pop(y),5), + round( corr(y,x),5), + round( covar_samp(y,x),5), + round( covar_pop(y,x),5), + regr_count(y,x), + round( regr_slope(y,x),5), + round( regr_intercept(y,x),5), + round( regr_r2(y,x),5), + round( regr_sxx(y,x),5), + round( regr_syy(y,x),5), + round( regr_sxy(y,x),5), + round( regr_avgx(y,x),5), + round( regr_avgy(y,x),5), + round( regr_count(y,x),5) + from t group by px order by px; + + +select id,regr_count(y,x) over (partition by px) from t order by id; http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/results/clientpositive/show_functions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index 1361b93..be58381 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -191,6 +191,15 @@ reflect2 regexp regexp_extract regexp_replace +regr_avgx +regr_avgy +regr_count +regr_intercept +regr_r2 +regr_slope +regr_sxx +regr_sxy +regr_syy repeat replace replicate_rows @@ -322,6 +331,7 @@ percentile posexplode positive regexp_replace +regr_slope replace reverse rlike http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out new file mode 100644 index 0000000..9de3dd9 --- /dev/null +++ b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out @@ -0,0 +1,464 @@ +PREHOOK: query: create table t (id int,px int,y decimal,x decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (id int,px int,y decimal,x decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values (101,1,1,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (101,1,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (201,2,1,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (201,2,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (301,3,1,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (301,3,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (401,4,1,11) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (401,4,1,11) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (501,5,1,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (501,5,1,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (601,6,null,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (601,6,null,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (701,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (701,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (102,1,2,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (102,1,2,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (202,2,1,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (202,2,1,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (302,3,2,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (302,3,2,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (402,4,2,12) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (402,4,2,12) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (502,5,2,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (502,5,2,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (602,6,null,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (602,6,null,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (702,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (702,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (103,1,3,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (103,1,3,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (203,2,1,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (203,2,1,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (303,3,3,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (303,3,3,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (403,4,3,13) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (403,4,3,13) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (503,5,3,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (503,5,3,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (603,6,null,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (603,6,null,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (703,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (703,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (104,1,4,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (104,1,4,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (204,2,1,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (204,2,1,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (304,3,4,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (304,3,4,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (404,4,4,14) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (404,4,4,14) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (504,5,4,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (504,5,4,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (604,6,null,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (604,6,null,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (704,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (704,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into t values (800,7,1,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (800,7,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id EXPRESSION [(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.px EXPRESSION [(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: t.y EXPRESSION [(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x), +regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x) + from t group by px order by px +PREHOOK: type: QUERY +POSTHOOK: query: explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x), +regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x) + from t group by px order by px +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: px (type: int), x (type: decimal(10,0)), y (type: decimal(10,0)) + outputColumnNames: px, x, y + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: var_pop(x), var_pop(y), corr(y, x), covar_samp(y, x), covar_pop(y, x), regr_count(y, x), regr_slope(y, x), regr_intercept(y, x), regr_r2(y, x), regr_sxx(y, x), regr_syy(y, x), regr_sxy(y, x), regr_avgx(y, x), regr_avgy(y, x) + keys: px (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>), _col4 (type: struct<count:bigint,xavg:double,yavg:double,covar:double>), _col5 (type: struct<count:bigint,xavg:double,yavg:double,covar:double>), _col6 (type: bigint), _col7 (type: struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>), _col8 (type: struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>), _col9 (type: struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>), _col11 (type: struct<count:bigint,sum:double,variance:double>), _col12 (type: struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>), _col13 (type: struct<count:bigint,sum:decimal(20,0), input:decimal(10,0)>), _col14 (type: struct<count:bigint,sum:decimal(20,0),input:decimal(10,0)>) + Reduce Operator Tree: + Group By Operator + aggregations: var_pop(VALUE._col0), var_pop(VALUE._col1), corr(VALUE._col2), covar_samp(VALUE._col3), covar_pop(VALUE._col4), regr_count(VALUE._col5), regr_slope(VALUE._col6), regr_intercept(VALUE._col7), regr_r2(VALUE._col8), regr_sxx(VALUE._col9), regr_syy(VALUE._col10), regr_sxy(VALUE._col11), regr_avgx(VALUE._col12), regr_avgy(VALUE._col13) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(14,4)), _col14 (type: decimal(14,4)), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(14,4)), _col14 (type: decimal(14,4)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: decimal(14,4)), VALUE._col13 (type: decimal(14,4)), VALUE._col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select px, + round( var_pop(x),5), + round( var_pop(y),5), + round( corr(y,x),5), + round( covar_samp(y,x),5), + round( covar_pop(y,x),5), + regr_count(y,x), + round( regr_slope(y,x),5), + round( regr_intercept(y,x),5), + round( regr_r2(y,x),5), + round( regr_sxx(y,x),5), + round( regr_syy(y,x),5), + round( regr_sxy(y,x),5), + round( regr_avgx(y,x),5), + round( regr_avgy(y,x),5), + round( regr_count(y,x),5) + from t group by px order by px +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select px, + round( var_pop(x),5), + round( var_pop(y),5), + round( corr(y,x),5), + round( covar_samp(y,x),5), + round( covar_pop(y,x),5), + regr_count(y,x), + round( regr_slope(y,x),5), + round( regr_intercept(y,x),5), + round( regr_r2(y,x),5), + round( regr_sxx(y,x),5), + round( regr_syy(y,x),5), + round( regr_sxy(y,x),5), + round( regr_avgx(y,x),5), + round( regr_avgy(y,x),5), + round( regr_count(y,x),5) + from t group by px order by px +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1 1.25 1.25 1.0 1.66667 1.25 4 1.0 0.0 1.0 5.0 5.0 5.0 2.50000 2.50000 4 +2 1.25 0.0 NaN 0.0 0.0 4 0.0 1.0 1.0 5.0 0.0 0.0 2.50000 1.00000 4 +3 0.0 1.25 NaN 0.0 0.0 4 NULL NaN NULL 0.0 5.0 0.0 1.00000 2.50000 4 +4 1.25 1.25 1.0 1.66667 1.25 4 1.0 -10.0 1.0 5.0 5.0 5.0 12.50000 2.50000 4 +5 NULL 1.25 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL 0 +6 1.25 NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL 0 +7 0.0 0.0 NULL 0.0 0.0 1 NULL NaN NULL 0.0 0.0 0.0 1.00000 1.00000 1 +PREHOOK: query: select id,regr_count(y,x) over (partition by px) from t order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select id,regr_count(y,x) over (partition by px) from t order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +101 4 +102 4 +103 4 +104 4 +201 4 +202 4 +203 4 +204 4 +301 4 +302 4 +303 4 +304 4 +401 4 +402 4 +403 4 +404 4 +501 0 +502 0 +503 0 +504 0 +601 0 +602 0 +603 0 +604 0 +701 0 +702 0 +703 0 +704 0 +800 1