This is an automated email from the ASF dual-hosted git repository.

weitingchen pushed a commit to branch branch-1.2
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/branch-1.2 by this push:
     new 0cddc4de1 [VL] Port PR #6661 #6707 for bug fixing in rc1 (#6792)
0cddc4de1 is described below

commit 0cddc4de1893c9ca8291d83f86fcc251bc9c8203
Author: Wei-Ting Chen <[email protected]>
AuthorDate: Tue Aug 13 14:35:31 2024 +0800

    [VL] Port PR #6661 #6707 for bug fixing in rc1 (#6792)
    
    * [VL] Skip UTF-8 validation in JSON parsing (#6661)
    
    * [VL] Fix high precision rounding (#6707)
    
    ---------
    
    Co-authored-by: PHILO-HE <[email protected]>
    Co-authored-by: Arnav Balyan <[email protected]>
---
 .../gluten/execution/ScalarFunctionsValidateSuite.scala    | 14 +++++++++++---
 cpp/velox/operators/functions/Arithmetic.h                 | 11 +++++++----
 ep/build-velox/src/modify_velox.patch                      | 11 +++++++++++
 .../catalyst/expressions/GlutenMathExpressionsSuite.scala  |  3 +++
 .../catalyst/expressions/GlutenMathExpressionsSuite.scala  |  4 ++++
 .../catalyst/expressions/GlutenMathExpressionsSuite.scala  |  3 +++
 6 files changed, 39 insertions(+), 7 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 80fd72909..7c1033db6 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -217,20 +217,28 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
     }
   }
 
-  test("Test get_json_object datatab function") {
+  test("get_json_object") {
     runQueryAndCompare(
       "SELECT get_json_object(string_field1, '$.a') " +
         "from datatab limit 1;") {
       checkGlutenOperatorMatch[ProjectExecTransformer]
     }
-  }
 
-  test("Test get_json_object lineitem function") {
     runQueryAndCompare(
       "SELECT l_orderkey, get_json_object('{\"a\":\"b\"}', '$.a') " +
         "from lineitem limit 1;") {
       checkGlutenOperatorMatch[ProjectExecTransformer]
     }
+
+    // Invalid UTF-8 encoding.
+    spark.sql(
+      "CREATE TABLE t USING parquet SELECT concat('{\"a\": 2, \"'," +
+        " string(X'80'), '\": 3, \"c\": 100}') AS c1")
+    withTable("t") {
+      runQueryAndCompare("SELECT get_json_object(c1, '$.c') FROM t;") {
+        checkGlutenOperatorMatch[ProjectExecTransformer]
+      }
+    }
   }
 
   ignore("json_array_length") {
diff --git a/cpp/velox/operators/functions/Arithmetic.h 
b/cpp/velox/operators/functions/Arithmetic.h
index 0474e1554..7b4c9ae9d 100644
--- a/cpp/velox/operators/functions/Arithmetic.h
+++ b/cpp/velox/operators/functions/Arithmetic.h
@@ -17,6 +17,7 @@
 #include <folly/CPortability.h>
 #include <stdint.h>
 #include <cmath>
+#include <limits>
 #include <type_traits>
 
 namespace gluten {
@@ -38,14 +39,16 @@ struct RoundFunction {
       return number;
     }
 
-    double factor = std::pow(10, decimals);
+    // Using long double for high precision during intermediate calculations.
+    // TODO: Make this more efficient with Boost to support high arbitrary 
precision at runtime.
+    long double factor = std::pow(10.0L, static_cast<long double>(decimals));
     static const TNum kInf = std::numeric_limits<TNum>::infinity();
+
     if (number < 0) {
-      return (std::round(std::nextafter(number, -kInf) * factor * -1) / 
factor) * -1;
+      return static_cast<TNum>((std::round(std::nextafter(number, -kInf) * 
factor * -1) / factor) * -1);
     }
-    return std::round(std::nextafter(number, kInf) * factor) / factor;
+    return static_cast<TNum>(std::round(std::nextafter(number, kInf) * factor) 
/ factor);
   }
-
   template <typename TInput>
   FOLLY_ALWAYS_INLINE void call(TInput& result, const TInput& a, const int32_t 
b = 0) {
     result = round(a, b);
diff --git a/ep/build-velox/src/modify_velox.patch 
b/ep/build-velox/src/modify_velox.patch
index 1cb352a25..8e688d8c5 100644
--- a/ep/build-velox/src/modify_velox.patch
+++ b/ep/build-velox/src/modify_velox.patch
@@ -177,3 +177,14 @@ index 2cabfc29a..54329ce23 100644
  
  add_library(
    velox_dwio_arrow_parquet_writer_test_lib
+diff --git a/CMake/resolve_dependency_modules/simdjson.cmake 
b/CMake/resolve_dependency_modules/simdjson.cmake
+index 69e7f2044..777eb5ec1 100644
+--- a/CMake/resolve_dependency_modules/simdjson.cmake
++++ b/CMake/resolve_dependency_modules/simdjson.cmake
+@@ -29,4 +29,6 @@ FetchContent_Declare(
+   URL ${VELOX_SIMDJSON_SOURCE_URL}
+   URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM})
+
++set(SIMDJSON_SKIPUTF8VALIDATION ON)
++
+ FetchContent_MakeAvailable(simdjson)
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
index 54583547d..765a64f91 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
@@ -121,6 +121,9 @@ class GlutenMathExpressionsSuite extends 
MathExpressionsSuite with GlutenTestsTr
     checkEvaluation(Round(-3.5, 0), -4.0)
     checkEvaluation(Round(-0.35, 1), -0.4)
     checkEvaluation(Round(-35, -1), -40)
+    checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
+    checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
+    checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
     checkEvaluation(BRound(2.5, 0), 2.0)
     checkEvaluation(BRound(3.5, 0), 4.0)
     checkEvaluation(BRound(-2.5, 0), -2.0)
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
index a60f0dce6..122f8dc06 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
@@ -249,6 +249,10 @@ class GlutenMathExpressionsSuite extends 
MathExpressionsSuite with GlutenTestsTr
     checkEvaluation(Round(-3.5, 0), -4.0)
     checkEvaluation(Round(-0.35, 1), -0.4)
     checkEvaluation(Round(-35, -1), -40)
+    checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
+    checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
+    checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
+    checkEvaluation(Round(-35, -1), -40)
     checkEvaluation(Round(BigDecimal("45.00"), -1), BigDecimal(50))
     checkEvaluation(BRound(2.5, 0), 2.0)
     checkEvaluation(BRound(3.5, 0), 4.0)
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
index e22092488..7308352e4 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
@@ -248,6 +248,9 @@ class GlutenMathExpressionsSuite extends 
MathExpressionsSuite with GlutenTestsTr
     checkEvaluation(BRound(-3.5, 0), -4.0)
     checkEvaluation(BRound(-0.35, 1), -0.4)
     checkEvaluation(BRound(-35, -1), -40)
+    checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
+    checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
+    checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
     checkEvaluation(BRound(BigDecimal("45.00"), -1), BigDecimal(40))
     checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(2.5), 
Literal(0))), Decimal(2))
     checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(3.5), 
Literal(0))), Decimal(3))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to