(doris) branch branch-4.1 updated: branch-4.1: [fix](be) Compare JSON numeric values by value #63396 (#63488)

yiguolei Fri, 22 May 2026 06:56:50 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 8a67e76b6a6 branch-4.1: [fix](be) Compare JSON numeric values by value 
#63396 (#63488)
8a67e76b6a6 is described below

commit 8a67e76b6a6e2bf51ee3545cb468df9b931819be
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 22 21:56:11 2026 +0800

    branch-4.1: [fix](be) Compare JSON numeric values by value #63396 (#63488)
    
    Cherry-picked from #63396
    
    Co-authored-by: Jerry Hu <[email protected]>
---
 be/src/util/jsonb_document.h                       | 247 +++++++++++++++++----
 be/test/core/jsonb/jsonb_document_test.cpp         |  72 ++++++
 .../sql/presto/scalar/TestJsonFunctions.out        |  16 +-
 .../test_json_contains_numeric_equality.out        |   3 +
 .../test_json_contains_numeric_equality.groovy     |  29 +++
 5 files changed, 312 insertions(+), 55 deletions(-)

diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index cb9425744ff..ce9155de0c5 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -67,10 +67,13 @@
 #define JSONB_JSONBDOCUMENT_H
 
 #include <algorithm>
+#include <array>
 #include <cctype>
 #include <charconv>
+#include <cmath>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <string>
 #include <string_view>
 #include <type_traits>
@@ -1004,6 +1007,196 @@ struct ArrayVal : public ContainerVal {
 
 namespace jsonb_detail {
 
+struct JsonbScaledDecimal {
+    wide::Int256 value;
+    uint32_t scale;
+};
+
+inline void validate_decimal_scale(uint32_t scale) {
+    if (scale > static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE)) {
+        throw Exception(ErrorCode::INTERNAL_ERROR,
+                        "Invalid JSONB decimal scale: {}, max allowed scale: 
{}", scale,
+                        BeConsts::MAX_DECIMALV3_SCALE);
+    }
+}
+
+inline bool is_numeric(const JsonbValue* value) {
+    return value->isInt() || value->isDouble() || value->isFloat() || 
value->isDecimal();
+}
+
+inline double floating_value(const JsonbValue* value) {
+    if (value->isDouble()) {
+        return value->unpack<JsonbDoubleVal>()->val();
+    }
+    return value->unpack<JsonbFloatVal>()->val();
+}
+
+inline JsonbScaledDecimal get_scaled_decimal(const JsonbValue* value) {
+    switch (value->type) {
+    case JsonbType::T_Decimal32: {
+        const auto* decimal = value->unpack<JsonbDecimal32>();
+        validate_decimal_scale(decimal->scale);
+        return {wide::Int256(decimal->val()), decimal->scale};
+    }
+    case JsonbType::T_Decimal64: {
+        const auto* decimal = value->unpack<JsonbDecimal64>();
+        validate_decimal_scale(decimal->scale);
+        return {wide::Int256(decimal->val()), decimal->scale};
+    }
+    case JsonbType::T_Decimal128: {
+        const auto* decimal = value->unpack<JsonbDecimal128>();
+        validate_decimal_scale(decimal->scale);
+        return {wide::Int256(decimal->val()), decimal->scale};
+    }
+    case JsonbType::T_Decimal256: {
+        const auto* decimal = value->unpack<JsonbDecimal256>();
+        validate_decimal_scale(decimal->scale);
+        return {decimal->val(), decimal->scale};
+    }
+    default:
+        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB decimal 
value type: {}",
+                        static_cast<int32_t>(value->type));
+    }
+}
+
+inline bool scaled_decimal_equal_decimal(const JsonbScaledDecimal& lhs,
+                                         const JsonbScaledDecimal& rhs) {
+    if (lhs.scale == rhs.scale) {
+        return lhs.value == rhs.value;
+    }
+
+    if (lhs.scale < rhs.scale) {
+        const auto scale_multiplier = 
decimal_scale_multiplier<wide::Int256>(rhs.scale - lhs.scale);
+        return rhs.value % scale_multiplier == 0 && lhs.value == rhs.value / 
scale_multiplier;
+    }
+
+    const auto scale_multiplier = 
decimal_scale_multiplier<wide::Int256>(lhs.scale - rhs.scale);
+    return lhs.value % scale_multiplier == 0 && lhs.value / scale_multiplier 
== rhs.value;
+}
+
+inline bool scaled_decimal_equal_integer(const JsonbScaledDecimal& decimal, 
int128_t integer) {
+    const auto integer_value = wide::Int256(integer);
+    if (decimal.scale == 0) {
+        return decimal.value == integer_value;
+    }
+
+    const auto scale_multiplier = 
decimal_scale_multiplier<wide::Int256>(decimal.scale);
+    return decimal.value % scale_multiplier == 0 &&
+           decimal.value / scale_multiplier == integer_value;
+}
+
+inline constexpr auto kPowersOfFive = [] {
+    std::array<wide::Int256, BeConsts::MAX_DECIMALV3_SCALE + 1> powers {};
+    powers[0] = 1;
+    for (size_t i = 1; i < powers.size(); ++i) {
+        powers[i] = powers[i - 1] * 5;
+    }
+    return powers;
+}();
+
+inline wide::Int256 power_of_five(uint32_t exponent) {
+    validate_decimal_scale(exponent);
+    return kPowersOfFive[exponent];
+}
+
+inline bool scaled_binary_equal(wide::Int256 value, int exponent, wide::Int256 
significand) {
+    if (exponent < 0) {
+        const int divisor_exponent = -exponent;
+        if (divisor_exponent >= std::numeric_limits<int64_t>::digits) {
+            return false;
+        }
+        const auto divisor = wide::Int256(1) << divisor_exponent;
+        return significand % divisor == 0 && value == significand / divisor;
+    }
+    constexpr int max_positive_int256_shift = 
std::numeric_limits<wide::Int256>::digits;
+    // wide::Int256 is signed, so shifting 1 by 255 reaches the sign bit.
+    if (exponent >= max_positive_int256_shift) {
+        return false;
+    }
+    const auto multiplier = wide::Int256(1) << exponent;
+    return value % multiplier == 0 && value / multiplier == significand;
+}
+
+inline bool floating_equal_integer(const JsonbValue* floating, int128_t 
integer) {
+    const double value = floating_value(floating);
+    int exponent = 0;
+    std::frexp(value, &exponent);
+    if (!std::isfinite(value) || std::trunc(value) != value) {
+        return false;
+    }
+    if (exponent >= 128) {
+        return value == -std::ldexp(1.0, 127) && integer == 
std::numeric_limits<int128_t>::min();
+    }
+    if (exponent <= -1) {
+        return false;
+    }
+    return static_cast<int128_t>(value) == integer;
+}
+
+inline bool floating_equal_decimal(const JsonbValue* floating, const 
JsonbScaledDecimal& decimal) {
+    const double value = floating_value(floating);
+    if (!std::isfinite(value)) {
+        return false;
+    }
+    if (value == 0) {
+        return decimal.value == 0;
+    }
+
+    int exponent = 0;
+    const double significand_fraction = std::frexp(value, &exponent);
+    const double significand_double =
+            std::ldexp(significand_fraction, 
std::numeric_limits<double>::digits);
+    auto significand = wide::Int256(static_cast<int64_t>(significand_double));
+    exponent -= std::numeric_limits<double>::digits;
+
+    const auto five_multiplier = power_of_five(decimal.scale);
+    if (decimal.value % five_multiplier != 0) {
+        return false;
+    }
+    const auto binary_scaled_decimal = decimal.value / five_multiplier;
+    return scaled_binary_equal(binary_scaled_decimal, exponent + 
decimal.scale, significand);
+}
+
+inline bool numeric_equal(const JsonbValue* lhs, const JsonbValue* rhs) {
+    if (!is_numeric(rhs)) {
+        return false;
+    }
+
+    if ((lhs->isDouble() || lhs->isFloat()) && rhs->isInt()) {
+        return floating_equal_integer(lhs, rhs->int_val());
+    }
+
+    if ((rhs->isDouble() || rhs->isFloat()) && lhs->isInt()) {
+        return floating_equal_integer(rhs, lhs->int_val());
+    }
+
+    if ((lhs->isDouble() || lhs->isFloat()) && rhs->isDecimal()) {
+        return floating_equal_decimal(lhs, get_scaled_decimal(rhs));
+    }
+
+    if ((rhs->isDouble() || rhs->isFloat()) && lhs->isDecimal()) {
+        return floating_equal_decimal(rhs, get_scaled_decimal(lhs));
+    }
+
+    if (lhs->isDouble() || lhs->isFloat()) {
+        return (rhs->isDouble() || rhs->isFloat()) && floating_value(lhs) == 
floating_value(rhs);
+    }
+
+    if (lhs->isDecimal()) {
+        const auto lhs_decimal = get_scaled_decimal(lhs);
+        if (rhs->isDecimal()) {
+            return scaled_decimal_equal_decimal(lhs_decimal, 
get_scaled_decimal(rhs));
+        }
+        return scaled_decimal_equal_integer(lhs_decimal, rhs->int_val());
+    }
+
+    if (rhs->isDecimal()) {
+        return scaled_decimal_equal_integer(get_scaled_decimal(rhs), 
lhs->int_val());
+    }
+
+    return lhs->int_val() == rhs->int_val();
+}
+
 inline bool array_contains_value(const ArrayVal* target_array, const 
JsonbValue* candidate) {
     const int target_num = target_array->numElem();
     for (int i = 0; i < target_num; ++i) {
@@ -1152,18 +1345,14 @@ inline bool JsonbValue::contains(const JsonbValue* rhs) 
const {
     case JsonbType::T_Int16:
     case JsonbType::T_Int32:
     case JsonbType::T_Int64:
-    case JsonbType::T_Int128: {
-        return rhs->isInt() && this->int_val() == rhs->int_val();
-    }
+    case JsonbType::T_Int128:
     case JsonbType::T_Double:
-    case JsonbType::T_Float: {
-        if (!rhs->isDouble() && !rhs->isFloat()) {
-            return false;
-        }
-        double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : 
unpack<JsonbFloatVal>()->val();
-        double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val()
-                                       : rhs->unpack<JsonbFloatVal>()->val();
-        return left == right;
+    case JsonbType::T_Float:
+    case JsonbType::T_Decimal32:
+    case JsonbType::T_Decimal64:
+    case JsonbType::T_Decimal128:
+    case JsonbType::T_Decimal256: {
+        return jsonb_detail::numeric_equal(this, rhs);
     }
     case JsonbType::T_String:
     case JsonbType::T_Binary: {
@@ -1206,42 +1395,6 @@ inline bool JsonbValue::contains(const JsonbValue* rhs) 
const {
     case JsonbType::T_False: {
         return rhs->isFalse();
     }
-    case JsonbType::T_Decimal32: {
-        if (rhs->isDecimal32()) {
-            return unpack<JsonbDecimal32>()->val() == 
rhs->unpack<JsonbDecimal32>()->val() &&
-                   unpack<JsonbDecimal32>()->precision ==
-                           rhs->unpack<JsonbDecimal32>()->precision &&
-                   unpack<JsonbDecimal32>()->scale == 
rhs->unpack<JsonbDecimal32>()->scale;
-        }
-        return false;
-    }
-    case JsonbType::T_Decimal64: {
-        if (rhs->isDecimal64()) {
-            return unpack<JsonbDecimal64>()->val() == 
rhs->unpack<JsonbDecimal64>()->val() &&
-                   unpack<JsonbDecimal64>()->precision ==
-                           rhs->unpack<JsonbDecimal64>()->precision &&
-                   unpack<JsonbDecimal64>()->scale == 
rhs->unpack<JsonbDecimal64>()->scale;
-        }
-        return false;
-    }
-    case JsonbType::T_Decimal128: {
-        if (rhs->isDecimal128()) {
-            return unpack<JsonbDecimal128>()->val() == 
rhs->unpack<JsonbDecimal128>()->val() &&
-                   unpack<JsonbDecimal128>()->precision ==
-                           rhs->unpack<JsonbDecimal128>()->precision &&
-                   unpack<JsonbDecimal128>()->scale == 
rhs->unpack<JsonbDecimal128>()->scale;
-        }
-        return false;
-    }
-    case JsonbType::T_Decimal256: {
-        if (rhs->isDecimal256()) {
-            return unpack<JsonbDecimal256>()->val() == 
rhs->unpack<JsonbDecimal256>()->val() &&
-                   unpack<JsonbDecimal256>()->precision ==
-                           rhs->unpack<JsonbDecimal256>()->precision &&
-                   unpack<JsonbDecimal256>()->scale == 
rhs->unpack<JsonbDecimal256>()->scale;
-        }
-        return false;
-    }
     case JsonbType::NUM_TYPES:
         break;
     }
diff --git a/be/test/core/jsonb/jsonb_document_test.cpp 
b/be/test/core/jsonb/jsonb_document_test.cpp
index c189afb025a..a26f1f87e72 100644
--- a/be/test/core/jsonb/jsonb_document_test.cpp
+++ b/be/test/core/jsonb/jsonb_document_test.cpp
@@ -19,6 +19,7 @@
 
 #include <gtest/gtest.h>
 
+#include <cmath>
 #include <cstdint>
 #include <limits>
 #include <string>
@@ -37,6 +38,10 @@ protected:
     void TearDown() override {}
 };
 
+static const JsonbValue* jsonb_value(JsonbWriter& writer) {
+    return writer.getDocument()->getValue();
+}
+
 TEST_F(JsonbDocumentTest, writer) {
     JsonbWriter writer;
     writer.writeStartObject();
@@ -295,4 +300,71 @@ TEST_F(JsonbDocumentTest, invaild_jsonb_document) {
     EXPECT_EQ(json_null, json_string);
 }
 
+TEST_F(JsonbDocumentTest, contains_numeric_equality) {
+    JsonbWriter int_writer;
+    ASSERT_TRUE(int_writer.writeInt(1));
+
+    JsonbWriter double_writer;
+    ASSERT_TRUE(double_writer.writeDouble(1.0));
+
+    JsonbWriter non_integer_double_writer;
+    ASSERT_TRUE(non_integer_double_writer.writeDouble(1.5));
+
+    JsonbWriter decimal32_writer;
+    ASSERT_TRUE(decimal32_writer.writeDecimal(Decimal32(int32_t(100)), 3, 2));
+
+    JsonbWriter decimal32_scale1_writer;
+    ASSERT_TRUE(decimal32_scale1_writer.writeDecimal(Decimal32(int32_t(10)), 
2, 1));
+
+    JsonbWriter decimal64_writer;
+    ASSERT_TRUE(decimal64_writer.writeDecimal(Decimal64(int64_t(1000)), 4, 3));
+
+    JsonbWriter decimal128_non_integer_writer;
+    
ASSERT_TRUE(decimal128_non_integer_writer.writeDecimal(Decimal128V3(int128_t(150)),
 3, 2));
+
+    JsonbWriter large_int128_writer;
+    ASSERT_TRUE(large_int128_writer.writeInt128((int128_t(1) << 100) + 1));
+
+    JsonbWriter large_decimal128_writer;
+    ASSERT_TRUE(
+            large_decimal128_writer.writeDecimal(Decimal128V3((int128_t(1) << 
100) + 1), 31, 0));
+
+    JsonbWriter large_double_writer;
+    ASSERT_TRUE(large_double_writer.writeDouble(std::ldexp(1.0, 100)));
+
+    EXPECT_TRUE(jsonb_value(int_writer)->contains(jsonb_value(double_writer)));
+    EXPECT_TRUE(jsonb_value(double_writer)->contains(jsonb_value(int_writer)));
+    
EXPECT_FALSE(jsonb_value(int_writer)->contains(jsonb_value(non_integer_double_writer)));
+    
EXPECT_FALSE(jsonb_value(large_int128_writer)->contains(jsonb_value(large_double_writer)));
+    
EXPECT_FALSE(jsonb_value(large_decimal128_writer)->contains(jsonb_value(large_double_writer)));
+    
EXPECT_FALSE(jsonb_value(large_double_writer)->contains(jsonb_value(large_decimal128_writer)));
+
+    
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(decimal32_scale1_writer)));
+    
EXPECT_TRUE(jsonb_value(decimal32_scale1_writer)->contains(jsonb_value(decimal32_writer)));
+    
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(decimal64_writer)));
+    
EXPECT_TRUE(jsonb_value(decimal64_writer)->contains(jsonb_value(decimal32_writer)));
+    
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(int_writer)));
+    
EXPECT_TRUE(jsonb_value(int_writer)->contains(jsonb_value(decimal32_writer)));
+    
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(double_writer)));
+    
EXPECT_TRUE(jsonb_value(double_writer)->contains(jsonb_value(decimal32_writer)));
+    
EXPECT_FALSE(jsonb_value(decimal128_non_integer_writer)->contains(jsonb_value(int_writer)));
+    
EXPECT_FALSE(jsonb_value(int_writer)->contains(jsonb_value(decimal128_non_integer_writer)));
+    
EXPECT_FALSE(jsonb_value(decimal128_non_integer_writer)->contains(jsonb_value(double_writer)));
+    
EXPECT_FALSE(jsonb_value(double_writer)->contains(jsonb_value(decimal128_non_integer_writer)));
+}
+
+TEST_F(JsonbDocumentTest, contains_invalid_decimal_scale) {
+    JsonbWriter invalid_decimal_writer;
+    ASSERT_TRUE(invalid_decimal_writer.writeDecimal(
+            Decimal32(int32_t(100)), 2, 
static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE) + 1));
+
+    JsonbWriter double_writer;
+    ASSERT_TRUE(double_writer.writeDouble(1.0));
+
+    
EXPECT_THROW(jsonb_value(invalid_decimal_writer)->contains(jsonb_value(double_writer)),
+                 Exception);
+    
EXPECT_THROW(jsonb_value(double_writer)->contains(jsonb_value(invalid_decimal_writer)),
+                 Exception);
+}
+
 } // namespace doris
diff --git 
a/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
 
b/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
index b34789e4d73..ec516837c55 100644
--- 
a/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
+++ 
b/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
@@ -54,7 +54,7 @@ true
 true
 
 -- !TestJsonFunctions_19 --
-false
+true
 
 -- !TestJsonFunctions_20 --
 \N
@@ -72,7 +72,7 @@ true
 true
 
 -- !TestJsonFunctions_25 --
-false
+true
 
 -- !TestJsonFunctions_26 --
 \N
@@ -93,7 +93,7 @@ true
 true
 
 -- !TestJsonFunctions_32 --
-false
+true
 
 -- !TestJsonFunctions_33 --
 \N
@@ -114,7 +114,7 @@ true
 true
 
 -- !TestJsonFunctions_39 --
-false
+true
 
 -- !TestJsonFunctions_40 --
 \N
@@ -381,7 +381,7 @@ true
 true
 
 -- !TestJsonFunctions_128 --
-false
+true
 
 -- !TestJsonFunctions_129 --
 \N
@@ -399,7 +399,7 @@ true
 true
 
 -- !TestJsonFunctions_134 --
-false
+true
 
 -- !TestJsonFunctions_135 --
 \N
@@ -420,7 +420,7 @@ true
 true
 
 -- !TestJsonFunctions_141 --
-false
+true
 
 -- !TestJsonFunctions_142 --
 \N
@@ -438,7 +438,7 @@ true
 true
 
 -- !TestJsonFunctions_147 --
-false
+true
 
 -- !TestJsonFunctions_148 --
 \N
diff --git 
a/regression-test/data/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.out
 
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.out
new file mode 100644
index 00000000000..0b2cf72b233
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.out
@@ -0,0 +1,3 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !json_contains_numeric_equality --
+true   true    true    true    true    true    false
diff --git 
a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.groovy
 
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.groovy
new file mode 100644
index 00000000000..4e38b9601c8
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.groovy
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_contains_numeric_equality", "p0") {
+    order_qt_json_contains_numeric_equality """
+        SELECT JSON_CONTAINS('1', '1.0'),
+               JSON_CONTAINS('1.0', '1'),
+               JSON_CONTAINS('[1]', '1.0'),
+               JSON_CONTAINS('[1.0]', '1'),
+               JSON_CONTAINS('{"qty": 1}', '{"qty": 1.0}'),
+               JSON_CONTAINS('{"qty": 1.0}', '{"qty": 1}'),
+               JSON_CONTAINS('{"qty": 1}', '{"qty": 1.5}')
+        ORDER BY 1;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-4.1 updated: branch-4.1: [fix](be) Compare JSON numeric values by value #63396 (#63488)

Reply via email to