This is an automated email from the ASF dual-hosted git repository.
weitingchen pushed a commit to branch branch-1.2
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/branch-1.2 by this push:
new 0cddc4de1 [VL] Port PR #6661 #6707 for bug fixing in rc1 (#6792)
0cddc4de1 is described below
commit 0cddc4de1893c9ca8291d83f86fcc251bc9c8203
Author: Wei-Ting Chen <[email protected]>
AuthorDate: Tue Aug 13 14:35:31 2024 +0800
[VL] Port PR #6661 #6707 for bug fixing in rc1 (#6792)
* [VL] Skip UTF-8 validation in JSON parsing (#6661)
* [VL] Fix high precision rounding (#6707)
---------
Co-authored-by: PHILO-HE <[email protected]>
Co-authored-by: Arnav Balyan <[email protected]>
---
.../gluten/execution/ScalarFunctionsValidateSuite.scala | 14 +++++++++++---
cpp/velox/operators/functions/Arithmetic.h | 11 +++++++----
ep/build-velox/src/modify_velox.patch | 11 +++++++++++
.../catalyst/expressions/GlutenMathExpressionsSuite.scala | 3 +++
.../catalyst/expressions/GlutenMathExpressionsSuite.scala | 4 ++++
.../catalyst/expressions/GlutenMathExpressionsSuite.scala | 3 +++
6 files changed, 39 insertions(+), 7 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 80fd72909..7c1033db6 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -217,20 +217,28 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
- test("Test get_json_object datatab function") {
+ test("get_json_object") {
runQueryAndCompare(
"SELECT get_json_object(string_field1, '$.a') " +
"from datatab limit 1;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
- }
- test("Test get_json_object lineitem function") {
runQueryAndCompare(
"SELECT l_orderkey, get_json_object('{\"a\":\"b\"}', '$.a') " +
"from lineitem limit 1;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
+
+ // Invalid UTF-8 encoding.
+ spark.sql(
+ "CREATE TABLE t USING parquet SELECT concat('{\"a\": 2, \"'," +
+ " string(X'80'), '\": 3, \"c\": 100}') AS c1")
+ withTable("t") {
+ runQueryAndCompare("SELECT get_json_object(c1, '$.c') FROM t;") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
}
ignore("json_array_length") {
diff --git a/cpp/velox/operators/functions/Arithmetic.h
b/cpp/velox/operators/functions/Arithmetic.h
index 0474e1554..7b4c9ae9d 100644
--- a/cpp/velox/operators/functions/Arithmetic.h
+++ b/cpp/velox/operators/functions/Arithmetic.h
@@ -17,6 +17,7 @@
#include <folly/CPortability.h>
#include <stdint.h>
#include <cmath>
+#include <limits>
#include <type_traits>
namespace gluten {
@@ -38,14 +39,16 @@ struct RoundFunction {
return number;
}
- double factor = std::pow(10, decimals);
+ // Using long double for high precision during intermediate calculations.
+ // TODO: Make this more efficient with Boost to support high arbitrary
precision at runtime.
+ long double factor = std::pow(10.0L, static_cast<long double>(decimals));
static const TNum kInf = std::numeric_limits<TNum>::infinity();
+
if (number < 0) {
- return (std::round(std::nextafter(number, -kInf) * factor * -1) /
factor) * -1;
+ return static_cast<TNum>((std::round(std::nextafter(number, -kInf) *
factor * -1) / factor) * -1);
}
- return std::round(std::nextafter(number, kInf) * factor) / factor;
+ return static_cast<TNum>(std::round(std::nextafter(number, kInf) * factor)
/ factor);
}
-
template <typename TInput>
FOLLY_ALWAYS_INLINE void call(TInput& result, const TInput& a, const int32_t
b = 0) {
result = round(a, b);
diff --git a/ep/build-velox/src/modify_velox.patch
b/ep/build-velox/src/modify_velox.patch
index 1cb352a25..8e688d8c5 100644
--- a/ep/build-velox/src/modify_velox.patch
+++ b/ep/build-velox/src/modify_velox.patch
@@ -177,3 +177,14 @@ index 2cabfc29a..54329ce23 100644
add_library(
velox_dwio_arrow_parquet_writer_test_lib
+diff --git a/CMake/resolve_dependency_modules/simdjson.cmake
b/CMake/resolve_dependency_modules/simdjson.cmake
+index 69e7f2044..777eb5ec1 100644
+--- a/CMake/resolve_dependency_modules/simdjson.cmake
++++ b/CMake/resolve_dependency_modules/simdjson.cmake
+@@ -29,4 +29,6 @@ FetchContent_Declare(
+ URL ${VELOX_SIMDJSON_SOURCE_URL}
+ URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM})
+
++set(SIMDJSON_SKIPUTF8VALIDATION ON)
++
+ FetchContent_MakeAvailable(simdjson)
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
index 54583547d..765a64f91 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
@@ -121,6 +121,9 @@ class GlutenMathExpressionsSuite extends
MathExpressionsSuite with GlutenTestsTr
checkEvaluation(Round(-3.5, 0), -4.0)
checkEvaluation(Round(-0.35, 1), -0.4)
checkEvaluation(Round(-35, -1), -40)
+ checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
+ checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
+ checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
checkEvaluation(BRound(2.5, 0), 2.0)
checkEvaluation(BRound(3.5, 0), 4.0)
checkEvaluation(BRound(-2.5, 0), -2.0)
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
index a60f0dce6..122f8dc06 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
@@ -249,6 +249,10 @@ class GlutenMathExpressionsSuite extends
MathExpressionsSuite with GlutenTestsTr
checkEvaluation(Round(-3.5, 0), -4.0)
checkEvaluation(Round(-0.35, 1), -0.4)
checkEvaluation(Round(-35, -1), -40)
+ checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
+ checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
+ checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
+ checkEvaluation(Round(-35, -1), -40)
checkEvaluation(Round(BigDecimal("45.00"), -1), BigDecimal(50))
checkEvaluation(BRound(2.5, 0), 2.0)
checkEvaluation(BRound(3.5, 0), 4.0)
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
index e22092488..7308352e4 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenMathExpressionsSuite.scala
@@ -248,6 +248,9 @@ class GlutenMathExpressionsSuite extends
MathExpressionsSuite with GlutenTestsTr
checkEvaluation(BRound(-3.5, 0), -4.0)
checkEvaluation(BRound(-0.35, 1), -0.4)
checkEvaluation(BRound(-35, -1), -40)
+ checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
+ checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
+ checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
checkEvaluation(BRound(BigDecimal("45.00"), -1), BigDecimal(40))
checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(2.5),
Literal(0))), Decimal(2))
checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(3.5),
Literal(0))), Decimal(3))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]