[hive] branch master updated: HIVE-25653: Incorrect results returned by STDDEV, STDDEV_SAMP, STDDEV_POP for floating point data types (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)

2021-11-07 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d0f77cc  HIVE-25653: Incorrect results returned by STDDEV, 
STDDEV_SAMP, STDDEV_POP for floating point data types (Ashish Sharma, reviewed 
by Adesh Rao, Sankar Hariappan)
d0f77cc is described below

commit d0f77cca1a6612894837a174440a5fd929cd3bcb
Author: Ashish Kumar Sharma 
AuthorDate: Mon Nov 8 12:23:55 2021 +0530

HIVE-25653: Incorrect results returned by STDDEV, STDDEV_SAMP, STDDEV_POP 
for floating point data types (Ashish Sharma, reviewed by Adesh Rao, Sankar 
Hariappan)

Signed-off-by: Sankar Hariappan 
Closes (#2760)
---
 .../hadoop/hive/ql/udf/generic/GenericUDAFStd.java |   8 +-
 .../hive/ql/udf/generic/GenericUDAFVariance.java   |  29 --
 ql/src/test/queries/clientpositive/stddev.q|  14 +++
 .../clientpositive/llap/cbo_rp_windowing_2.q.out   |  42 -
 .../test/results/clientpositive/llap/stddev.q.out  | 102 +
 .../clientpositive/llap/vector_windowing.q.out |  42 -
 .../results/clientpositive/llap/windowing.q.out|  42 -
 7 files changed, 205 insertions(+), 74 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
index 79b519c..729455c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
@@ -27,6 +27,9 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
+import java.math.BigDecimal;
+import java.math.MathContext;
+
 /**
  * Compute the standard deviation by extending GenericUDAFVariance and
  * overriding the terminate() method of the evaluator.
@@ -90,7 +93,10 @@ public class GenericUDAFStd extends GenericUDAFVariance {
  * use it, etc.
  */
 public static double calculateStdResult(double variance, long count) {
-  return Math.sqrt(variance / count);
+  // TODO: BigDecimal.sqrt() is introduced in java 9. So change the below 
calculation once hive upgraded to java 9 or above.
+  BigDecimal bvariance = new BigDecimal(variance);
+  BigDecimal result = bvariance.divide(new BigDecimal(count), 
MathContext.DECIMAL128);
+  return Math.sqrt(result.doubleValue());
 }
 
 @Override
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
index bb55d88..5e60edc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.udf.generic;
 
+import java.math.BigDecimal;
+import java.math.MathContext;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
@@ -106,9 +108,14 @@ public class GenericUDAFVariance extends 
AbstractGenericUDAFResolver {
*/
   public static double calculateIntermediate(
   long count, double sum, double value, double variance) {
-double t = count * value - sum;
-variance += (t * t) / ((double) count * (count - 1));
-return variance;
+BigDecimal bcount,bsum,bvalue,bvariance;
+bvariance = new BigDecimal(variance);
+bsum = new BigDecimal(sum);
+bvalue = new BigDecimal(value);
+bcount = new BigDecimal(count);
+BigDecimal t = bcount.multiply(bvalue).subtract(bsum);
+bvariance = 
bvariance.add(t.multiply(t).divide(bcount.multiply(bcount.subtract(BigDecimal.ONE)),MathContext.DECIMAL128));
+return bvariance.doubleValue();
   }
 
   /*
@@ -120,14 +127,16 @@ public class GenericUDAFVariance extends 
AbstractGenericUDAFResolver {
   long partialCount, long mergeCount, double partialSum, double mergeSum,
   double partialVariance, double mergeVariance) {
 
-final double doublePartialCount = (double) partialCount;
-final double doubleMergeCount = (double) mergeCount;
+final BigDecimal bPartialCount = new BigDecimal(partialCount);
+final BigDecimal bMergeCount = new BigDecimal(mergeCount);
+BigDecimal bmergeVariance = new BigDecimal(mergeVariance);
 
-double t = (doublePartialCount / doubleMergeCount) * mergeSum - partialSum;
-mergeVariance +=
-partialVariance + ((doubleMergeCount / doublePartialCount) /
-(doubleMergeCount + doublePartialCount)) * t * t;
-return mergeVariance;
+BigDecimal t =
+bPartialCount.divide(bMergeCount, MathContext.DECIMAL128).multiply(new 
BigDecimal(mergeSum)).subtract(new BigDecimal(partialSum));
+
+bmergeVariance = 

[hive] branch master updated: HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, Sankar Hariappan

2021-11-07 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new aa7a903  HIVE-25659: Metastore direct sql queries with IN/(NOT IN) 
should be split based on max parameters allowed by SQL DB (Nikhil Gupta, 
reviewed by Adesh Rao, Sankar Hariappan)
aa7a903 is described below

commit aa7a9030ee4d457dd6da45db63a12ce7d972362a
Author: guptanikhil007 
AuthorDate: Mon Nov 8 11:21:35 2021 +0530

HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split 
based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, 
Sankar Hariappan)

Signed-off-by: Sankar Hariappan 
Closes (#2758)
---
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |  3 +++
 .../apache/hadoop/hive/metastore/txn/TxnUtils.java |  6 ++---
 .../hadoop/hive/metastore/txn/TestTxnUtils.java| 29 +++---
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 0e05ad3..21ea1f8 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -680,6 +680,9 @@ public class MetastoreConf {
 
DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause",
 "hive.direct.sql.max.elements.values.clause",
 1000, "The maximum number of values in a VALUES clause for INSERT 
statement."),
+DIRECT_SQL_MAX_PARAMETERS("metastore.direct.sql.max.parameters",
+"hive.direct.sql.max.parameters", 1000, "The maximum query parameters 
\n" +
+"backend sql engine can support."),
 DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length",
 "hive.direct.sql.max.query.length", 100, "The maximum\n" +
 " size of a query string (in KB)."),
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
index f2c881a..13d45d1 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java
@@ -265,6 +265,7 @@ public class TxnUtils {
 // Get configuration parameters
 int maxQueryLength = MetastoreConf.getIntVar(conf, 
ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH);
 int batchSize = MetastoreConf.getIntVar(conf, 
ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE);
+int maxParameters = MetastoreConf.getIntVar(conf, 
ConfVars.DIRECT_SQL_MAX_PARAMETERS);
 
 // Check parameter set validity as a public method.
 if (inList == null || inList.size() == 0 || maxQueryLength <= 0 || 
batchSize <= 0) {
@@ -316,7 +317,7 @@ public class TxnUtils {
   // Compute the size of a query when the 'nextValue' is added to the 
current query.
   int querySize = querySizeExpected(buf.length(), nextValue.length(), 
suffix.length(), addParens);
 
-  if (querySize > maxQueryLength * 1024) {
+  if ((querySize > maxQueryLength * 1024) || (currentCount >= 
maxParameters)) {
 // Check an edge case where the DIRECT_SQL_MAX_QUERY_LENGTH does not 
allow one 'IN' clause with single value.
 if (cursor4queryOfInClauses == 1 && cursor4InClauseElements == 0) {
   throw new IllegalArgumentException("The current " + 
ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH.getVarname() + " is set too small to have 
one IN clause with single value!");
@@ -351,9 +352,8 @@ public class TxnUtils {
 continue;
   } else if (cursor4InClauseElements >= batchSize-1 && 
cursor4InClauseElements != 0) {
 // Finish the current 'IN'/'NOT IN' clause and start a new clause.
-buf.setCharAt(buf.length() - 1, ')'); // replace the "commar".
+buf.setCharAt(buf.length() - 1, ')'); // replace the "comma".
 buf.append(newInclausePrefix.toString());
-
 newInclausePrefixJustAppended = true;
 
 // increment cursor for per-query IN-clause list
diff --git 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
 
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
index 811a6ac..42f1ca4 100644
--- 
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java
+++