Repository: hive
Updated Branches:
  refs/heads/master 4cb87670e -> 0e62d3dcb


HIVE-15978: Support regr_* functions (Zoltan Haindrich, reviewed by Ashutosh 
Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0e62d3dc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0e62d3dc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0e62d3dc

Branch: refs/heads/master
Commit: 0e62d3dcb9e7945f140fc17fe8eca628579d5385
Parents: 4cb8767
Author: Zoltan Haindrich <k...@rxd.hu>
Authored: Thu Mar 16 18:59:10 2017 +0100
Committer: Zoltan Haindrich <k...@rxd.hu>
Committed: Thu Mar 16 19:42:01 2017 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  10 +
 .../generic/GenericUDAFBinarySetFunctions.java  | 464 +++++++++++++++++++
 .../TestGenericUDAFBinarySetFunctions.java      | 416 +++++++++++++++++
 .../clientpositive/udaf_binarysetfunctions.q    |  57 +++
 .../results/clientpositive/show_functions.q.out |  10 +
 .../udaf_binarysetfunctions.q.out               | 464 +++++++++++++++++++
 6 files changed, 1421 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 4ac25c2..e3ace2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -418,6 +418,16 @@ public final class FunctionRegistry {
     system.registerGenericUDAF("covar_pop", new GenericUDAFCovariance());
     system.registerGenericUDAF("covar_samp", new 
GenericUDAFCovarianceSample());
     system.registerGenericUDAF("corr", new GenericUDAFCorrelation());
+    system.registerGenericUDAF("regr_slope", new 
GenericUDAFBinarySetFunctions.RegrSlope());
+    system.registerGenericUDAF("regr_intercept", new 
GenericUDAFBinarySetFunctions.RegrIntercept());
+    system.registerGenericUDAF("regr_r2", new 
GenericUDAFBinarySetFunctions.RegrR2());
+    system.registerGenericUDAF("regr_sxx", new 
GenericUDAFBinarySetFunctions.RegrSXX());
+    system.registerGenericUDAF("regr_syy", new 
GenericUDAFBinarySetFunctions.RegrSYY());
+    system.registerGenericUDAF("regr_sxy", new 
GenericUDAFBinarySetFunctions.RegrSXY());
+    system.registerGenericUDAF("regr_avgx", new 
GenericUDAFBinarySetFunctions.RegrAvgX());
+    system.registerGenericUDAF("regr_avgy", new 
GenericUDAFBinarySetFunctions.RegrAvgY());
+    system.registerGenericUDAF("regr_count", new 
GenericUDAFBinarySetFunctions.RegrCount());
+
     system.registerGenericUDAF("histogram_numeric", new 
GenericUDAFHistogramNumeric());
     system.registerGenericUDAF("percentile_approx", new 
GenericUDAFPercentileApprox());
     system.registerGenericUDAF("collect_set", new GenericUDAFCollectSet());

http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java
new file mode 100644
index 0000000..e799a94
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBinarySetFunctions.java
@@ -0,0 +1,464 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage.GenericUDAFAverageEvaluatorDouble;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage.GenericUDAFAverageEvaluatorDecimal;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation.GenericUDAFCorrelationEvaluator;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount.GenericUDAFCountEvaluator;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance.GenericUDAFVarianceEvaluator;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+public class GenericUDAFBinarySetFunctions extends AbstractGenericUDAFResolver 
{
+
+  @Description(name = "regr_count", value = "_FUNC_(y,x) - returns the number 
of non-null pairs", extended = "The function takes as arguments any pair of 
numeric types and returns a long.\n"
+      + "Any pair with a NULL is ignored.")
+  public static class RegrCount extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    private static class Evaluator extends GenericUDAFCountEvaluator {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[0] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[0] });
+      }
+    }
+  }
+
+  @Description(name = "regr_sxx", value = "_FUNC_(y,x) - auxiliary analytic 
function", extended = "The function takes as arguments any pair of numeric 
types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   SUM(x*x)-SUM(x)*SUM(x)/N\n")
+  public static class RegrSXX extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    private static class Evaluator extends GenericUDAFVarianceEvaluator {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[1] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[1] });
+      }
+
+      @Override
+      public Object terminate(AggregationBuffer agg) throws HiveException {
+        StdAgg myagg = (StdAgg) agg;
+        if (myagg.count == 0) {
+          return null;
+        } else {
+          DoubleWritable result = getResult();
+          result.set(myagg.variance);
+          return result;
+        }
+      }
+    }
+  }
+
+  @Description(name = "regr_syy", value = "_FUNC_(y,x) - auxiliary analytic 
function", extended = "The function takes as arguments any pair of numeric 
types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   SUM(y*y)-SUM(y)*SUM(y)/N\n")
+  public static class RegrSYY extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    private static class Evaluator extends GenericUDAFVarianceEvaluator {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[0] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[0] });
+      }
+
+      @Override
+      public Object terminate(AggregationBuffer agg) throws HiveException {
+        StdAgg myagg = (StdAgg) agg;
+        if (myagg.count == 0) {
+          return null;
+        } else {
+          DoubleWritable result = getResult();
+          result.set(myagg.variance);
+          return result;
+        }
+      }
+    }
+  }
+
+  @Description(name = "regr_avgx", value = "_FUNC_(y,x) - evaluates the 
average of the independent variable", extended = "The function takes as 
arguments any pair of numeric types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   AVG(X)")
+  public static class RegrAvgX extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      if (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory() == 
PrimitiveCategory.DECIMAL) {
+        return new EvaluatorDecimal();
+      } else {
+        return new EvaluatorDouble();
+      }
+    }
+
+    private static class EvaluatorDouble extends 
GenericUDAFAverageEvaluatorDouble {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[1] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[1] });
+      }
+    }
+
+    private static class EvaluatorDecimal extends 
GenericUDAFAverageEvaluatorDecimal {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[1] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[1] });
+      }
+    }
+  }
+
+  @Description(name = "regr_avgy", value = "_FUNC_(y,x) - evaluates the 
average of the dependent variable", extended = "The function takes as arguments 
any pair of numeric types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   AVG(Y)")
+  public static class RegrAvgY extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      if (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory() == 
PrimitiveCategory.DECIMAL) {
+        return new EvaluatorDecimal();
+      } else {
+        return new EvaluatorDouble();
+      }
+    }
+
+    private static class EvaluatorDouble extends 
GenericUDAFAverageEvaluatorDouble {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[0] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[0] });
+      }
+    }
+
+    private static class EvaluatorDecimal extends 
GenericUDAFAverageEvaluatorDecimal {
+
+      @Override
+      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws 
HiveException {
+        switch (m) {
+        case COMPLETE:
+        case PARTIAL1:
+          return super.init(m, new ObjectInspector[] { parameters[0] });
+        default:
+          return super.init(m, parameters);
+        }
+      }
+
+      @Override
+      public void iterate(AggregationBuffer agg, Object[] parameters) throws 
HiveException {
+        if (parameters[0] == null || parameters[1] == null)
+          return;
+        super.iterate(agg, new Object[] { parameters[0] });
+      }
+    }
+  }
+
+  @Description(name = "regr_slope", value = "_FUNC_(y,x) - returns the slope 
of the linear regression line", extended = "The function takes as arguments any 
pair of numeric types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned (the fit would be a 
vertical).\n"
+      + "Otherwise, it computes the following:\n"
+      + "   (N*SUM(x*y)-SUM(x)*SUM(y)) / (N*SUM(x*x)-SUM(x)*SUM(x))")
+  public static class RegrSlope extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    /**
+     * NOTE: corr is declared as corr(x,y) instead corr(y,x)
+     */
+    private static class Evaluator extends GenericUDAFCorrelationEvaluator {
+
+      @Override
+      public Object terminate(AggregationBuffer agg) throws HiveException {
+        StdAgg myagg = (StdAgg) agg;
+
+        if (myagg.count < 2 || myagg.yvar == 0.0d) {
+          return null;
+        } else {
+          getResult().set(myagg.covar / myagg.yvar);
+          return getResult();
+        }
+      }
+    }
+  }
+
+  @Description(name = "regr_r2", value = "_FUNC_(y,x) - returns the 
coefficient of determination (also called R-squared or goodness of fit) for the 
regression line.", extended = "The function takes as arguments any pair of 
numeric types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned.\n"
+      + "If N*SUM(y*y) = SUM(y)*SUM(y): 1 is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   POWER( N*SUM(x*y)-SUM(x)*SUM(y) ,2)  /  ( 
(N*SUM(x*x)-SUM(x)*SUM(x)) * (N*SUM(y*y)-SUM(y)*SUM(y)) )")
+  public static class RegrR2 extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    /**
+     * NOTE: corr is declared as corr(x,y) instead corr(y,x)
+     */
+    private static class Evaluator extends GenericUDAFCorrelationEvaluator {
+
+      @Override
+      public Object terminate(AggregationBuffer agg) throws HiveException {
+        StdAgg myagg = (StdAgg) agg;
+
+        if (myagg.count < 2 || myagg.yvar == 0.0d) {
+          return null;
+        }
+        DoubleWritable result = getResult();
+        if (myagg.xvar == 0.0d) {
+          result.set(1.0d);
+        } else {
+          result.set(myagg.covar * myagg.covar / myagg.yvar / myagg.xvar);
+        }
+        return result;
+      }
+    }
+  }
+
+  @Description(name = "regr_sxy", value = "_FUNC_(y,x) - return a value that 
can be used to evaluate the statistical validity of a regression model.", 
extended = "The function takes as arguments any pair of numeric types and 
returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   SUM(x*y)-SUM(x)*SUM(y)/N")
+  public static class RegrSXY extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    /**
+     * NOTE: corr is declared as corr(x,y) instead corr(y,x)
+     */
+    private static class Evaluator extends GenericUDAFCorrelationEvaluator {
+
+      @Override
+      public Object terminate(AggregationBuffer agg) throws HiveException {
+        StdAgg myagg = (StdAgg) agg;
+
+        if (myagg.count == 0) {
+          return null;
+        }
+        DoubleWritable result = getResult();
+        result.set(myagg.covar);
+        return result;
+      }
+    }
+  }
+
+  @Description(name = "regr_intercept", value = "_FUNC_(y,x) - returns the 
y-intercept of the regression line.", extended = "The function takes as 
arguments any pair of numeric types and returns a double.\n"
+      + "Any pair with a NULL is ignored.\n"
+      + "If applied to an empty set: NULL is returned.\n"
+      + "If N*SUM(x*x) = SUM(x)*SUM(x): NULL is returned.\n"
+      + "Otherwise, it computes the following:\n"
+      + "   ( SUM(y)*SUM(x*x)-SUM(X)*SUM(x*y) )  /  ( N*SUM(x*x)-SUM(x)*SUM(x) 
)")
+  public static class RegrIntercept extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws 
SemanticException {
+      checkArgumentTypes(parameters);
+      return new Evaluator();
+    }
+
+    /**
+     * NOTE: corr is declared as corr(x,y) instead corr(y,x)
+     */
+    private static class Evaluator extends GenericUDAFCorrelationEvaluator {
+
+      @Override
+      public Object terminate(AggregationBuffer agg) throws HiveException {
+        StdAgg myagg = (StdAgg) agg;
+
+        if (myagg.count == 0) {
+          return null;
+        }
+        DoubleWritable result = getResult();
+        double slope = myagg.covar / myagg.yvar;
+        result.set(myagg.xavg - slope * myagg.yavg);
+        return result;
+      }
+    }
+  }
+
+  private static void checkArgumentTypes(TypeInfo[] parameters) throws 
UDFArgumentTypeException {
+    if (parameters.length != 2) {
+      throw new UDFArgumentTypeException(parameters.length - 1,
+          "Exactly two arguments are expected.");
+    }
+
+    if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+      throw new UDFArgumentTypeException(0, "Only primitive type arguments are 
accepted but "
+          + parameters[0].getTypeName() + " is passed.");
+    }
+
+    if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+      throw new UDFArgumentTypeException(1, "Only primitive type arguments are 
accepted but "
+          + parameters[1].getTypeName() + " is passed.");
+    }
+
+    if (!acceptedPrimitiveCategory(((PrimitiveTypeInfo) 
parameters[0]).getPrimitiveCategory())) {
+      throw new UDFArgumentTypeException(0, "Only numeric type arguments are 
accepted but "
+          + parameters[0].getTypeName() + " is passed.");
+
+    }
+    if (!acceptedPrimitiveCategory(((PrimitiveTypeInfo) 
parameters[1]).getPrimitiveCategory())) {
+      throw new UDFArgumentTypeException(1, "Only numeric type arguments are 
accepted but "
+          + parameters[1].getTypeName() + " is passed.");
+    }
+  }
+
+  private static boolean acceptedPrimitiveCategory(PrimitiveCategory 
primitiveCategory) {
+    switch (primitiveCategory) {
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case FLOAT:
+    case DOUBLE:
+    case TIMESTAMP:
+    case DECIMAL:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java
new file mode 100644
index 0000000..584caf1
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFBinarySetFunctions.java
@@ -0,0 +1,416 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import static 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.LongWritable;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import jersey.repackaged.com.google.common.collect.Lists;
+
+@RunWith(Parameterized.class)
+public class TestGenericUDAFBinarySetFunctions {
+
+  private List<Object[]> rowSet;
+
+  @Parameters(name = "{0}")
+  public static List<Object[]> getParameters() {
+    List<Object[]> ret = new ArrayList<>();
+    ret.add(new Object[] { "seq/seq", RowSetGenerator.generate(10,
+        new RowSetGenerator.DoubleSequence(0), new 
RowSetGenerator.DoubleSequence(0)) });
+    ret.add(new Object[] { "seq/ones", RowSetGenerator.generate(10,
+        new RowSetGenerator.DoubleSequence(0), new 
RowSetGenerator.ConstantSequence(1.0)) });
+    ret.add(new Object[] { "ones/seq", RowSetGenerator.generate(10,
+        new RowSetGenerator.ConstantSequence(1.0), new 
RowSetGenerator.DoubleSequence(0)) });
+    ret.add(new Object[] { "empty", RowSetGenerator.generate(0,
+        new RowSetGenerator.DoubleSequence(0), new 
RowSetGenerator.DoubleSequence(0)) });
+    ret.add(new Object[] { "lonely", RowSetGenerator.generate(1,
+        new RowSetGenerator.DoubleSequence(0), new 
RowSetGenerator.DoubleSequence(0)) });
+    ret.add(new Object[] { "seq/seq+10", RowSetGenerator.generate(10,
+        new RowSetGenerator.DoubleSequence(0), new 
RowSetGenerator.DoubleSequence(10)) });
+    ret.add(new Object[] { "seq/null", RowSetGenerator.generate(10,
+        new RowSetGenerator.DoubleSequence(0), new 
RowSetGenerator.ConstantSequence(null)) });
+    ret.add(new Object[] { "null/seq0", RowSetGenerator.generate(10,
+        new RowSetGenerator.ConstantSequence(null), new 
RowSetGenerator.DoubleSequence(0)) });
+    return ret;
+  }
+
+  public static class GenericUDAFExecutor {
+
+    private GenericUDAFResolver2 evaluatorFactory;
+    private GenericUDAFParameterInfo info;
+    private ObjectInspector[] partialOIs;
+
+    public GenericUDAFExecutor(GenericUDAFResolver2 evaluatorFactory, 
GenericUDAFParameterInfo info)
+        throws Exception {
+      this.evaluatorFactory = evaluatorFactory;
+      this.info = info;
+
+      GenericUDAFEvaluator eval0 = evaluatorFactory.getEvaluator(info);
+      partialOIs = new ObjectInspector[] {
+          eval0.init(GenericUDAFEvaluator.Mode.PARTIAL1, 
info.getParameterObjectInspectors()) };
+
+    }
+
+    List<Object> run(List<Object[]> values) throws Exception {
+      Object r1 = runComplete(values);
+      Object r2 = runPartialFinal(values);
+      Object r3 = runPartial2Final(values);
+      return Lists.newArrayList(r1, r2, r3);
+    }
+
+    private Object runComplete(List<Object[]> values) throws 
SemanticException, HiveException {
+      GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info);
+      eval.init(GenericUDAFEvaluator.Mode.COMPLETE, 
info.getParameterObjectInspectors());
+      AggregationBuffer agg = eval.getNewAggregationBuffer();
+      for (Object[] parameters : values) {
+        eval.iterate(agg, parameters);
+      }
+      return eval.terminate(agg);
+    }
+
+    private Object runPartialFinal(List<Object[]> values) throws Exception {
+      GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info);
+      eval.init(GenericUDAFEvaluator.Mode.FINAL, partialOIs);
+      AggregationBuffer buf = eval.getNewAggregationBuffer();
+      for (Object partialResult : runPartial1(values)) {
+        eval.merge(buf, partialResult);
+      }
+      return eval.terminate(buf);
+    }
+
+    private Object runPartial2Final(List<Object[]> values) throws Exception {
+      GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info);
+      eval.init(GenericUDAFEvaluator.Mode.FINAL, partialOIs);
+      AggregationBuffer buf = eval.getNewAggregationBuffer();
+      for (Object partialResult : runPartial2(runPartial1(values))) {
+        eval.merge(buf, partialResult);
+      }
+      return eval.terminate(buf);
+    }
+
+    private List<Object> runPartial1(List<Object[]> values) throws Exception {
+      List<Object> ret = new ArrayList<>();
+      int batchSize = 1;
+      Iterator<Object[]> iter = values.iterator();
+      do {
+        GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info);
+        eval.init(GenericUDAFEvaluator.Mode.PARTIAL1, 
info.getParameterObjectInspectors());
+        AggregationBuffer buf = eval.getNewAggregationBuffer();
+        for (int i = 0; i < batchSize - 1 && iter.hasNext(); i++) {
+          eval.iterate(buf, iter.next());
+        }
+        batchSize <<= 1;
+        ret.add(eval.terminatePartial(buf));
+
+        // back-check to force at least 1 output; and this should have a 
partial which is empty
+      } while (iter.hasNext());
+      return ret;
+    }
+
+    private List<Object> runPartial2(List<Object> values) throws Exception {
+      List<Object> ret = new ArrayList<>();
+      int batchSize = 1;
+      Iterator<Object> iter = values.iterator();
+      do {
+        GenericUDAFEvaluator eval = evaluatorFactory.getEvaluator(info);
+        eval.init(GenericUDAFEvaluator.Mode.PARTIAL2, partialOIs);
+        AggregationBuffer buf = eval.getNewAggregationBuffer();
+        for (int i = 0; i < batchSize - 1 && iter.hasNext(); i++) {
+          eval.merge(buf, iter.next());
+        }
+        batchSize <<= 1;
+        ret.add(eval.terminatePartial(buf));
+
+        // back-check to force at least 1 output; and this should have a 
partial which is empty
+      } while (iter.hasNext());
+      return ret;
+    }
+  }
+
+  public static class RowSetGenerator {
+    public static interface FieldGenerator {
+      public Object apply(int rowIndex);
+    }
+
+    public static class ConstantSequence implements FieldGenerator {
+      private Object constant;
+
+      public ConstantSequence(Object constant) {
+        this.constant = constant;
+      }
+
+      @Override
+      public Object apply(int rowIndex) {
+        return constant;
+      }
+    }
+
+    public static class DoubleSequence implements FieldGenerator {
+
+      private int offset;
+
+      public DoubleSequence(int offset) {
+        this.offset = offset;
+      }
+
+      @Override
+      public Object apply(int rowIndex) {
+        double d = rowIndex + offset;
+        return d;
+      }
+    }
+
+    public static List<Object[]> generate(int numRows, FieldGenerator... 
generators) {
+      ArrayList<Object[]> ret = new ArrayList<>(numRows);
+      for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+        ArrayList<Object> row = new ArrayList<>();
+        for (FieldGenerator g : generators) {
+          row.add(g.apply(rowIdx));
+        }
+        ret.add(row.toArray());
+      }
+      return ret;
+    }
+  }
+
+  public TestGenericUDAFBinarySetFunctions(String label, List<Object[]> 
rowSet) {
+    this.rowSet = rowSet;
+  }
+
+  @Test
+  public void regr_count() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.count(), new 
GenericUDAFBinarySetFunctions.RegrCount());
+  }
+
+  @Test
+  public void regr_sxx() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.sxx(), new GenericUDAFBinarySetFunctions.RegrSXX());
+  }
+
+  @Test
+  public void regr_syy() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.syy(), new GenericUDAFBinarySetFunctions.RegrSYY());
+  }
+
+  @Test
+  public void regr_sxy() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.sxy(), new GenericUDAFBinarySetFunctions.RegrSXY());
+  }
+
+  @Test
+  public void regr_avgx() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.avgx(), new 
GenericUDAFBinarySetFunctions.RegrAvgX());
+  }
+
+  @Test
+  public void regr_avgy() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.avgy(), new 
GenericUDAFBinarySetFunctions.RegrAvgY());
+  }
+
+  @Test
+  public void regr_slope() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.slope(), new 
GenericUDAFBinarySetFunctions.RegrSlope());
+  }
+
+  @Test
+  public void regr_r2() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.r2(), new GenericUDAFBinarySetFunctions.RegrR2());
+  }
+
+  @Test
+  public void regr_intercept() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.intercept(), new 
GenericUDAFBinarySetFunctions.RegrIntercept());
+  }
+
+  @Test
+  @Ignore("HIVE-16178 should fix this")
+  public void corr() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.corr(), new GenericUDAFCorrelation());
+  }
+
+  @Test
+  public void covar_pop() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.covar_pop(), new GenericUDAFCovariance());
+  }
+
+  @Test
+  @Ignore("HIVE-16178 should fix this")
+  public void covar_samp() throws Exception {
+    RegrIntermediate expected = RegrIntermediate.computeFor(rowSet);
+    validateUDAF(expected.covar_samp(), new GenericUDAFCovarianceSample());
+  }
+
+  private void validateUDAF(Double expectedResult, GenericUDAFResolver2 udaf) 
throws Exception {
+    ObjectInspector[] params =
+        new ObjectInspector[] { javaDoubleObjectInspector, 
javaDoubleObjectInspector };
+    GenericUDAFParameterInfo gpi = new SimpleGenericUDAFParameterInfo(params, 
false, false, false);
+    GenericUDAFExecutor executor = new GenericUDAFExecutor(udaf, gpi);
+
+    List<Object> values = executor.run(rowSet);
+
+    if (expectedResult == null) {
+      for (Object v : values) {
+        assertNull(v);
+      }
+    } else {
+      for (Object v : values) {
+        if (v instanceof DoubleWritable) {
+          assertEquals(expectedResult, ((DoubleWritable) v).get(), 1e-10);
+        } else {
+          assertEquals(expectedResult, ((LongWritable) v).get(), 1e-10);
+        }
+      }
+    }
+  }
+
+  static class RegrIntermediate {
+    public double sum_x2, sum_y2;
+    public double sum_x, sum_y;
+    public double sum_xy;
+    public double n;
+
+    public void add(Double y, Double x) {
+      if (x == null || y == null) {
+        return;
+      }
+      sum_x2 += x * x;
+      sum_y2 += y * y;
+      sum_x += x;
+      sum_y += y;
+      sum_xy += x * y;
+      n++;
+    }
+
+    public Double intercept() {
+      double xx = n * sum_x2 - sum_x * sum_x;
+      if (n == 0)
+        return null;
+      return (sum_y * sum_x2 - sum_x * sum_xy) / xx;
+    }
+
+    public Double sxy() {
+      if (n == 0)
+        return null;
+      return sum_xy - sum_x * sum_y / n;
+    }
+
+    public Double covar_pop() {
+      if (n == 0)
+        return null;
+      return (sum_xy - sum_x * sum_y / n) / n;
+    }
+
+    public Double covar_samp() {
+      if (n <= 1)
+        return null;
+      return (sum_xy - sum_x * sum_y / n) / (n - 1);
+    }
+
+    public Double corr() {
+      double xx = n * sum_x2 - sum_x * sum_x;
+      double yy = n * sum_y2 - sum_y * sum_y;
+      if (n == 0 || xx == 0.0d || yy == 0.0d)
+        return null;
+      double c = n * sum_xy - sum_x * sum_y;
+      return Math.sqrt(c * c / xx / yy);
+    }
+
+    public Double r2() {
+      double xx = n * sum_x2 - sum_x * sum_x;
+      double yy = n * sum_y2 - sum_y * sum_y;
+      if (n == 0 || xx == 0.0d)
+        return null;
+      if (yy == 0.0d)
+        return 1.0d;
+      double c = n * sum_xy - sum_x * sum_y;
+      return c * c / xx / yy;
+    }
+
+    public Double slope() {
+      if (n == 0 || n * sum_x2 == sum_x * sum_x)
+        return null;
+      return (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x);
+    }
+
+    public Double avgx() {
+      if (n == 0)
+        return null;
+      return sum_x / n;
+    }
+
+    public Double avgy() {
+      if (n == 0)
+        return null;
+      return sum_y / n;
+    }
+
+    public Double count() {
+      return n;
+    }
+
+    public Double sxx() {
+      if (n == 0)
+        return null;
+      return sum_x2 - sum_x * sum_x / n;
+    }
+
+    public Double syy() {
+      if (n == 0)
+        return null;
+      return sum_y2 - sum_y * sum_y / n;
+    }
+
+    public static RegrIntermediate computeFor(List<Object[]> rows) {
+      RegrIntermediate ri = new RegrIntermediate();
+      for (Object[] objects : rows) {
+        ri.add((Double) objects[0], (Double) objects[1]);
+      }
+      return ri;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q 
b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q
new file mode 100644
index 0000000..2039312
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions.q
@@ -0,0 +1,57 @@
+create table t (id int,px int,y decimal,x decimal);
+
+insert into t values (101,1,1,1);
+insert into t values (201,2,1,1);
+insert into t values (301,3,1,1);
+insert into t values (401,4,1,11);
+insert into t values (501,5,1,null);
+insert into t values (601,6,null,1);
+insert into t values (701,6,null,null);
+insert into t values (102,1,2,2);
+insert into t values (202,2,1,2);
+insert into t values (302,3,2,1);
+insert into t values (402,4,2,12);
+insert into t values (502,5,2,null);
+insert into t values (602,6,null,2);
+insert into t values (702,6,null,null);
+insert into t values (103,1,3,3);
+insert into t values (203,2,1,3);
+insert into t values (303,3,3,1);
+insert into t values (403,4,3,13);
+insert into t values (503,5,3,null);
+insert into t values (603,6,null,3);
+insert into t values (703,6,null,null);
+insert into t values (104,1,4,4);
+insert into t values (204,2,1,4);
+insert into t values (304,3,4,1);
+insert into t values (404,4,4,14);
+insert into t values (504,5,4,null);
+insert into t values (604,6,null,4);
+insert into t values (704,6,null,null);
+insert into t values (800,7,1,1);
+
+
+explain select 
px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), 
regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px;
+
+select px,
+       round(  var_pop(x),5),
+       round(  var_pop(y),5),
+       round(  corr(y,x),5),
+       round(  covar_samp(y,x),5),
+       round(  covar_pop(y,x),5),
+       regr_count(y,x),
+       round(  regr_slope(y,x),5),
+       round(  regr_intercept(y,x),5),
+       round(  regr_r2(y,x),5),
+       round(  regr_sxx(y,x),5),
+       round(  regr_syy(y,x),5),
+       round(  regr_sxy(y,x),5),
+       round(  regr_avgx(y,x),5),
+       round(  regr_avgy(y,x),5),
+       round(  regr_count(y,x),5)
+ from t group by px order by px;
+
+
+select id,regr_count(y,x) over (partition by px) from t order by id;

http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out 
b/ql/src/test/results/clientpositive/show_functions.q.out
index 1361b93..be58381 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -191,6 +191,15 @@ reflect2
 regexp
 regexp_extract
 regexp_replace
+regr_avgx
+regr_avgy
+regr_count
+regr_intercept
+regr_r2
+regr_slope
+regr_sxx
+regr_sxy
+regr_syy
 repeat
 replace
 replicate_rows
@@ -322,6 +331,7 @@ percentile
 posexplode
 positive
 regexp_replace
+regr_slope
 replace
 reverse
 rlike

http://git-wip-us.apache.org/repos/asf/hive/blob/0e62d3dc/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out 
b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out
new file mode 100644
index 0000000..9de3dd9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out
@@ -0,0 +1,464 @@
+PREHOOK: query: create table t (id int,px int,y decimal,x decimal)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (id int,px int,y decimal,x decimal)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values (101,1,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (101,1,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (201,2,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (201,2,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (301,3,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (301,3,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (401,4,1,11)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (401,4,1,11)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (501,5,1,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (501,5,1,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (601,6,null,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (601,6,null,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (701,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (701,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (102,1,2,2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (102,1,2,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (202,2,1,2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (202,2,1,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: insert into t values (302,3,2,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (302,3,2,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (402,4,2,12)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (402,4,2,12)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (502,5,2,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (502,5,2,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (602,6,null,2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (602,6,null,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (702,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (702,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (103,1,3,3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (103,1,3,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (203,2,1,3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (203,2,1,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (303,3,3,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (303,3,3,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (403,4,3,13)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (403,4,3,13)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (503,5,3,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (503,5,3,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (603,6,null,3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (603,6,null,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__20)values__tmp__table__20.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (703,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (703,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__21)values__tmp__table__21.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (104,1,4,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (104,1,4,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__22)values__tmp__table__22.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (204,2,1,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (204,2,1,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__23)values__tmp__table__23.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (304,3,4,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (304,3,4,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__24)values__tmp__table__24.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (404,4,4,14)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (404,4,4,14)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__25)values__tmp__table__25.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (504,5,4,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (504,5,4,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__26)values__tmp__table__26.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (604,6,null,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (604,6,null,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__27)values__tmp__table__27.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (704,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (704,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__28)values__tmp__table__28.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: insert into t values (800,7,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (800,7,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id EXPRESSION 
[(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col1,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.px EXPRESSION 
[(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col2,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.x EXPRESSION 
[(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col4,
 type:string, comment:), ]
+POSTHOOK: Lineage: t.y EXPRESSION 
[(values__tmp__table__29)values__tmp__table__29.FieldSchema(name:tmp_values_col3,
 type:string, comment:), ]
+PREHOOK: query: explain select 
px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), 
regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select 
px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), 
regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t
+            Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: px (type: int), x (type: decimal(10,0)), y (type: 
decimal(10,0))
+              outputColumnNames: px, x, y
+              Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: var_pop(x), var_pop(y), corr(y, x), 
covar_samp(y, x), covar_pop(y, x), regr_count(y, x), regr_slope(y, x), 
regr_intercept(y, x), regr_r2(y, x), regr_sxx(y, x), regr_syy(y, x), 
regr_sxy(y, x), regr_avgx(y, x), regr_avgy(y, x)
+                keys: px (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+                Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 29 Data size: 281 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: 
struct<count:bigint,sum:double,variance:double>), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>),
 _col4 (type: struct<count:bigint,xavg:double,yavg:double,covar:double>), _col5 
(type: struct<count:bigint,xavg:double,yavg:double,covar:double>), _col6 (type: 
bigint), _col7 (type: 
struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>),
 _col8 (type: 
struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>),
 _col9 (type: 
struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>),
 _col10 (type: struct<count:bigint,sum:double,variance:double>), _col11 (type: 
struct<count:bigint,sum:double,variance:double>), _col12 (type: 
struct<count:bigint,xavg:double,yavg:double,xvar:double,yvar:double,covar:double>),
 _col13 (type: struct<count:bigint,sum:decimal(20,0),
 input:decimal(10,0)>), _col14 (type: 
struct<count:bigint,sum:decimal(20,0),input:decimal(10,0)>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: var_pop(VALUE._col0), var_pop(VALUE._col1), 
corr(VALUE._col2), covar_samp(VALUE._col3), covar_pop(VALUE._col4), 
regr_count(VALUE._col5), regr_slope(VALUE._col6), regr_intercept(VALUE._col7), 
regr_r2(VALUE._col8), regr_sxx(VALUE._col9), regr_syy(VALUE._col10), 
regr_sxy(VALUE._col11), regr_avgx(VALUE._col12), regr_avgy(VALUE._col13)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+          Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: double), _col10 
(type: double), _col11 (type: double), _col12 (type: double), _col13 (type: 
decimal(14,4)), _col14 (type: decimal(14,4)), _col2 (type: double), _col3 
(type: double), _col4 (type: double), _col5 (type: double), _col6 (type: 
bigint), _col7 (type: double), _col8 (type: double), _col9 (type: double)
+            outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, 
_col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+            Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: double), _col2 (type: double), 
_col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: 
bigint), _col7 (type: double), _col8 (type: double), _col9 (type: double), 
_col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 
(type: decimal(14,4)), _col14 (type: decimal(14,4))
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: 
double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 
(type: double), VALUE._col4 (type: double), VALUE._col5 (type: bigint), 
VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: 
double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 
(type: double), VALUE._col12 (type: decimal(14,4)), VALUE._col13 (type: 
decimal(14,4)), VALUE._col5 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+          Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select px,
+       round(  var_pop(x),5),
+       round(  var_pop(y),5),
+       round(  corr(y,x),5),
+       round(  covar_samp(y,x),5),
+       round(  covar_pop(y,x),5),
+       regr_count(y,x),
+       round(  regr_slope(y,x),5),
+       round(  regr_intercept(y,x),5),
+       round(  regr_r2(y,x),5),
+       round(  regr_sxx(y,x),5),
+       round(  regr_syy(y,x),5),
+       round(  regr_sxy(y,x),5),
+       round(  regr_avgx(y,x),5),
+       round(  regr_avgy(y,x),5),
+       round(  regr_count(y,x),5)
+ from t group by px order by px
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select        px,
+       round(  var_pop(x),5),
+       round(  var_pop(y),5),
+       round(  corr(y,x),5),
+       round(  covar_samp(y,x),5),
+       round(  covar_pop(y,x),5),
+       regr_count(y,x),
+       round(  regr_slope(y,x),5),
+       round(  regr_intercept(y,x),5),
+       round(  regr_r2(y,x),5),
+       round(  regr_sxx(y,x),5),
+       round(  regr_syy(y,x),5),
+       round(  regr_sxy(y,x),5),
+       round(  regr_avgx(y,x),5),
+       round(  regr_avgy(y,x),5),
+       round(  regr_count(y,x),5)
+ from t group by px order by px
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      1.25    1.25    1.0     1.66667 1.25    4       1.0     0.0     1.0     
5.0     5.0     5.0     2.50000 2.50000 4
+2      1.25    0.0     NaN     0.0     0.0     4       0.0     1.0     1.0     
5.0     0.0     0.0     2.50000 1.00000 4
+3      0.0     1.25    NaN     0.0     0.0     4       NULL    NaN     NULL    
0.0     5.0     0.0     1.00000 2.50000 4
+4      1.25    1.25    1.0     1.66667 1.25    4       1.0     -10.0   1.0     
5.0     5.0     5.0     12.50000        2.50000 4
+5      NULL    1.25    NULL    NULL    NULL    0       NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    0
+6      1.25    NULL    NULL    NULL    NULL    0       NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    0
+7      0.0     0.0     NULL    0.0     0.0     1       NULL    NaN     NULL    
0.0     0.0     0.0     1.00000 1.00000 1
+PREHOOK: query: select id,regr_count(y,x) over (partition by px) from t order 
by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select id,regr_count(y,x) over (partition by px) from t order 
by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+101    4
+102    4
+103    4
+104    4
+201    4
+202    4
+203    4
+204    4
+301    4
+302    4
+303    4
+304    4
+401    4
+402    4
+403    4
+404    4
+501    0
+502    0
+503    0
+504    0
+601    0
+602    0
+603    0
+604    0
+701    0
+702    0
+703    0
+704    0
+800    1

Reply via email to