[hive] branch master updated: HIVE-25653: Incorrect results returned by STDDEV, STDDEV_SAMP, STDDEV_POP for floating point data types (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new d0f77cc HIVE-25653: Incorrect results returned by STDDEV, STDDEV_SAMP, STDDEV_POP for floating point data types (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan) d0f77cc is described below commit d0f77cca1a6612894837a174440a5fd929cd3bcb Author: Ashish Kumar Sharma AuthorDate: Mon Nov 8 12:23:55 2021 +0530 HIVE-25653: Incorrect results returned by STDDEV, STDDEV_SAMP, STDDEV_POP for floating point data types (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan) Signed-off-by: Sankar Hariappan Closes (#2760) --- .../hadoop/hive/ql/udf/generic/GenericUDAFStd.java | 8 +- .../hive/ql/udf/generic/GenericUDAFVariance.java | 29 -- ql/src/test/queries/clientpositive/stddev.q| 14 +++ .../clientpositive/llap/cbo_rp_windowing_2.q.out | 42 - .../test/results/clientpositive/llap/stddev.q.out | 102 + .../clientpositive/llap/vector_windowing.q.out | 42 - .../results/clientpositive/llap/windowing.q.out| 42 - 7 files changed, 205 insertions(+), 74 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java index 79b519c..729455c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java @@ -27,6 +27,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import java.math.BigDecimal; +import java.math.MathContext; + /** * Compute the standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. @@ -90,7 +93,10 @@ public class GenericUDAFStd extends GenericUDAFVariance { * use it, etc. */ public static double calculateStdResult(double variance, long count) { - return Math.sqrt(variance / count); + // TODO: BigDecimal.sqrt() is introduced in java 9. So change the below calculation once hive upgraded to java 9 or above. + BigDecimal bvariance = new BigDecimal(variance); + BigDecimal result = bvariance.divide(new BigDecimal(count), MathContext.DECIMAL128); + return Math.sqrt(result.doubleValue()); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java index bb55d88..5e60edc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.math.BigDecimal; +import java.math.MathContext; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; @@ -106,9 +108,14 @@ public class GenericUDAFVariance extends AbstractGenericUDAFResolver { */ public static double calculateIntermediate( long count, double sum, double value, double variance) { -double t = count * value - sum; -variance += (t * t) / ((double) count * (count - 1)); -return variance; +BigDecimal bcount,bsum,bvalue,bvariance; +bvariance = new BigDecimal(variance); +bsum = new BigDecimal(sum); +bvalue = new BigDecimal(value); +bcount = new BigDecimal(count); +BigDecimal t = bcount.multiply(bvalue).subtract(bsum); +bvariance = bvariance.add(t.multiply(t).divide(bcount.multiply(bcount.subtract(BigDecimal.ONE)),MathContext.DECIMAL128)); +return bvariance.doubleValue(); } /* @@ -120,14 +127,16 @@ public class GenericUDAFVariance extends AbstractGenericUDAFResolver { long partialCount, long mergeCount, double partialSum, double mergeSum, double partialVariance, double mergeVariance) { -final double doublePartialCount = (double) partialCount; -final double doubleMergeCount = (double) mergeCount; +final BigDecimal bPartialCount = new BigDecimal(partialCount); +final BigDecimal bMergeCount = new BigDecimal(mergeCount); +BigDecimal bmergeVariance = new BigDecimal(mergeVariance); -double t = (doublePartialCount / doubleMergeCount) * mergeSum - partialSum; -mergeVariance += -partialVariance + ((doubleMergeCount / doublePartialCount) / -(doubleMergeCount + doublePartialCount)) * t * t; -return mergeVariance; +BigDecimal t = +bPartialCount.divide(bMergeCount, MathContext.DECIMAL128).multiply(new BigDecimal(mergeSum)).subtract(new BigDecimal(partialSum)); + +bmergeVariance =
[hive] branch master updated: HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, Sankar Hariappan
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new aa7a903 HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, Sankar Hariappan) aa7a903 is described below commit aa7a9030ee4d457dd6da45db63a12ce7d972362a Author: guptanikhil007 AuthorDate: Mon Nov 8 11:21:35 2021 +0530 HIVE-25659: Metastore direct sql queries with IN/(NOT IN) should be split based on max parameters allowed by SQL DB (Nikhil Gupta, reviewed by Adesh Rao, Sankar Hariappan) Signed-off-by: Sankar Hariappan Closes (#2758) --- .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 +++ .../apache/hadoop/hive/metastore/txn/TxnUtils.java | 6 ++--- .../hadoop/hive/metastore/txn/TestTxnUtils.java| 29 +++--- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 0e05ad3..21ea1f8 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -680,6 +680,9 @@ public class MetastoreConf { DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("metastore.direct.sql.max.elements.values.clause", "hive.direct.sql.max.elements.values.clause", 1000, "The maximum number of values in a VALUES clause for INSERT statement."), +DIRECT_SQL_MAX_PARAMETERS("metastore.direct.sql.max.parameters", +"hive.direct.sql.max.parameters", 1000, "The maximum query parameters \n" + +"backend sql engine can support."), DIRECT_SQL_MAX_QUERY_LENGTH("metastore.direct.sql.max.query.length", "hive.direct.sql.max.query.length", 100, "The maximum\n" + " size of a query string (in KB)."), diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java index f2c881a..13d45d1 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnUtils.java @@ -265,6 +265,7 @@ public class TxnUtils { // Get configuration parameters int maxQueryLength = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH); int batchSize = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE); +int maxParameters = MetastoreConf.getIntVar(conf, ConfVars.DIRECT_SQL_MAX_PARAMETERS); // Check parameter set validity as a public method. if (inList == null || inList.size() == 0 || maxQueryLength <= 0 || batchSize <= 0) { @@ -316,7 +317,7 @@ public class TxnUtils { // Compute the size of a query when the 'nextValue' is added to the current query. int querySize = querySizeExpected(buf.length(), nextValue.length(), suffix.length(), addParens); - if (querySize > maxQueryLength * 1024) { + if ((querySize > maxQueryLength * 1024) || (currentCount >= maxParameters)) { // Check an edge case where the DIRECT_SQL_MAX_QUERY_LENGTH does not allow one 'IN' clause with single value. if (cursor4queryOfInClauses == 1 && cursor4InClauseElements == 0) { throw new IllegalArgumentException("The current " + ConfVars.DIRECT_SQL_MAX_QUERY_LENGTH.getVarname() + " is set too small to have one IN clause with single value!"); @@ -351,9 +352,8 @@ public class TxnUtils { continue; } else if (cursor4InClauseElements >= batchSize-1 && cursor4InClauseElements != 0) { // Finish the current 'IN'/'NOT IN' clause and start a new clause. -buf.setCharAt(buf.length() - 1, ')'); // replace the "commar". +buf.setCharAt(buf.length() - 1, ')'); // replace the "comma". buf.append(newInclausePrefix.toString()); - newInclausePrefixJustAppended = true; // increment cursor for per-query IN-clause list diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java index 811a6ac..42f1ca4 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/txn/TestTxnUtils.java +++