This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fb38a5df891 [fix](be) Compare JSON numeric values by value (#63396)
fb38a5df891 is described below
commit fb38a5df89172330a08cf5250df6fb41a68b7332
Author: Jerry Hu <[email protected]>
AuthorDate: Thu May 21 18:19:10 2026 +0800
[fix](be) Compare JSON numeric values by value (#63396)
### What problem does this PR solve?
Issue Number: None
Related PR: None
Problem Summary: json_contains compared JSONB numeric values by their
stored binary categories. As a result, semantically equal numbers such
as integer 1 and floating-point 1.0, or decimals with different scales,
were not treated as contained. This changes JsonbValue::contains to
compare integer, floating-point, and decimal JSONB values by numeric
value, while keeping exact integer and decimal comparisons to avoid
precision-related false matches.
### Release note
Fix json_contains to match semantically equal JSON numeric values across
numeric storage categories.
### Check List (For Author)
- Test:
- Build: ./build.sh --be --fe
- Unit Test: ./run-be-ut.sh --run
--filter=JsonbDocumentTest.contains_numeric_equality
- Regression test: ./run-regression-test.sh --conf
output/local-regression/regression-conf-29000.groovy --run -d
query_p0/sql_functions/json_functions -s
test_json_contains_numeric_equality -forceGenOut
- Regression test: ./run-regression-test.sh --conf
output/local-regression/regression-conf-29000.groovy --run -d
query_p0/sql_functions/json_functions -s
test_json_contains_numeric_equality
- Style: build-support/check-format.sh
- Style: git diff --check
- Static analysis: build-support/run-clang-tidy.sh --build-dir
be/build_Release (attempted; failed on pre-existing header/system
include diagnostics unrelated to the changed lines)
- Behavior changed: Yes (json_contains now treats semantically equal
JSON numeric values across integer, floating-point, and decimal JSONB
representations as contained)
- Does this need documentation: No
---
be/src/util/jsonb_document.h | 247 +++++++++++++++++----
be/test/core/jsonb/jsonb_document_test.cpp | 72 ++++++
.../sql/presto/scalar/TestJsonFunctions.out | 16 +-
.../test_json_contains_numeric_equality.out | 3 +
.../test_json_contains_numeric_equality.groovy | 29 +++
5 files changed, 312 insertions(+), 55 deletions(-)
diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index c141d67c79a..0300c5935f6 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -67,10 +67,13 @@
#define JSONB_JSONBDOCUMENT_H
#include <algorithm>
+#include <array>
#include <cctype>
#include <charconv>
+#include <cmath>
#include <cstddef>
#include <cstdint>
+#include <limits>
#include <string>
#include <string_view>
#include <type_traits>
@@ -1004,6 +1007,196 @@ struct ArrayVal : public ContainerVal {
namespace jsonb_detail {
+struct JsonbScaledDecimal {
+ wide::Int256 value;
+ uint32_t scale;
+};
+
+inline void validate_decimal_scale(uint32_t scale) {
+ if (scale > static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE)) {
+ throw Exception(ErrorCode::INTERNAL_ERROR,
+ "Invalid JSONB decimal scale: {}, max allowed scale:
{}", scale,
+ BeConsts::MAX_DECIMALV3_SCALE);
+ }
+}
+
+inline bool is_numeric(const JsonbValue* value) {
+ return value->isInt() || value->isDouble() || value->isFloat() ||
value->isDecimal();
+}
+
+inline double floating_value(const JsonbValue* value) {
+ if (value->isDouble()) {
+ return value->unpack<JsonbDoubleVal>()->val();
+ }
+ return value->unpack<JsonbFloatVal>()->val();
+}
+
+inline JsonbScaledDecimal get_scaled_decimal(const JsonbValue* value) {
+ switch (value->type) {
+ case JsonbType::T_Decimal32: {
+ const auto* decimal = value->unpack<JsonbDecimal32>();
+ validate_decimal_scale(decimal->scale);
+ return {wide::Int256(decimal->val()), decimal->scale};
+ }
+ case JsonbType::T_Decimal64: {
+ const auto* decimal = value->unpack<JsonbDecimal64>();
+ validate_decimal_scale(decimal->scale);
+ return {wide::Int256(decimal->val()), decimal->scale};
+ }
+ case JsonbType::T_Decimal128: {
+ const auto* decimal = value->unpack<JsonbDecimal128>();
+ validate_decimal_scale(decimal->scale);
+ return {wide::Int256(decimal->val()), decimal->scale};
+ }
+ case JsonbType::T_Decimal256: {
+ const auto* decimal = value->unpack<JsonbDecimal256>();
+ validate_decimal_scale(decimal->scale);
+ return {decimal->val(), decimal->scale};
+ }
+ default:
+ throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB decimal
value type: {}",
+ static_cast<int32_t>(value->type));
+ }
+}
+
+inline bool scaled_decimal_equal_decimal(const JsonbScaledDecimal& lhs,
+ const JsonbScaledDecimal& rhs) {
+ if (lhs.scale == rhs.scale) {
+ return lhs.value == rhs.value;
+ }
+
+ if (lhs.scale < rhs.scale) {
+ const auto scale_multiplier =
decimal_scale_multiplier<wide::Int256>(rhs.scale - lhs.scale);
+ return rhs.value % scale_multiplier == 0 && lhs.value == rhs.value /
scale_multiplier;
+ }
+
+ const auto scale_multiplier =
decimal_scale_multiplier<wide::Int256>(lhs.scale - rhs.scale);
+ return lhs.value % scale_multiplier == 0 && lhs.value / scale_multiplier
== rhs.value;
+}
+
+inline bool scaled_decimal_equal_integer(const JsonbScaledDecimal& decimal,
int128_t integer) {
+ const auto integer_value = wide::Int256(integer);
+ if (decimal.scale == 0) {
+ return decimal.value == integer_value;
+ }
+
+ const auto scale_multiplier =
decimal_scale_multiplier<wide::Int256>(decimal.scale);
+ return decimal.value % scale_multiplier == 0 &&
+ decimal.value / scale_multiplier == integer_value;
+}
+
+inline constexpr auto kPowersOfFive = [] {
+ std::array<wide::Int256, BeConsts::MAX_DECIMALV3_SCALE + 1> powers {};
+ powers[0] = 1;
+ for (size_t i = 1; i < powers.size(); ++i) {
+ powers[i] = powers[i - 1] * 5;
+ }
+ return powers;
+}();
+
+inline wide::Int256 power_of_five(uint32_t exponent) {
+ validate_decimal_scale(exponent);
+ return kPowersOfFive[exponent];
+}
+
+inline bool scaled_binary_equal(wide::Int256 value, int exponent, wide::Int256
significand) {
+ if (exponent < 0) {
+ const int divisor_exponent = -exponent;
+ if (divisor_exponent >= std::numeric_limits<int64_t>::digits) {
+ return false;
+ }
+ const auto divisor = wide::Int256(1) << divisor_exponent;
+ return significand % divisor == 0 && value == significand / divisor;
+ }
+ constexpr int max_positive_int256_shift =
std::numeric_limits<wide::Int256>::digits;
+ // wide::Int256 is signed, so shifting 1 by 255 reaches the sign bit.
+ if (exponent >= max_positive_int256_shift) {
+ return false;
+ }
+ const auto multiplier = wide::Int256(1) << exponent;
+ return value % multiplier == 0 && value / multiplier == significand;
+}
+
+inline bool floating_equal_integer(const JsonbValue* floating, int128_t
integer) {
+ const double value = floating_value(floating);
+ int exponent = 0;
+ std::frexp(value, &exponent);
+ if (!std::isfinite(value) || std::trunc(value) != value) {
+ return false;
+ }
+ if (exponent >= 128) {
+ return value == -std::ldexp(1.0, 127) && integer ==
std::numeric_limits<int128_t>::min();
+ }
+ if (exponent <= -1) {
+ return false;
+ }
+ return static_cast<int128_t>(value) == integer;
+}
+
+inline bool floating_equal_decimal(const JsonbValue* floating, const
JsonbScaledDecimal& decimal) {
+ const double value = floating_value(floating);
+ if (!std::isfinite(value)) {
+ return false;
+ }
+ if (value == 0) {
+ return decimal.value == 0;
+ }
+
+ int exponent = 0;
+ const double significand_fraction = std::frexp(value, &exponent);
+ const double significand_double =
+ std::ldexp(significand_fraction,
std::numeric_limits<double>::digits);
+ auto significand = wide::Int256(static_cast<int64_t>(significand_double));
+ exponent -= std::numeric_limits<double>::digits;
+
+ const auto five_multiplier = power_of_five(decimal.scale);
+ if (decimal.value % five_multiplier != 0) {
+ return false;
+ }
+ const auto binary_scaled_decimal = decimal.value / five_multiplier;
+ return scaled_binary_equal(binary_scaled_decimal, exponent +
decimal.scale, significand);
+}
+
+inline bool numeric_equal(const JsonbValue* lhs, const JsonbValue* rhs) {
+ if (!is_numeric(rhs)) {
+ return false;
+ }
+
+ if ((lhs->isDouble() || lhs->isFloat()) && rhs->isInt()) {
+ return floating_equal_integer(lhs, rhs->int_val());
+ }
+
+ if ((rhs->isDouble() || rhs->isFloat()) && lhs->isInt()) {
+ return floating_equal_integer(rhs, lhs->int_val());
+ }
+
+ if ((lhs->isDouble() || lhs->isFloat()) && rhs->isDecimal()) {
+ return floating_equal_decimal(lhs, get_scaled_decimal(rhs));
+ }
+
+ if ((rhs->isDouble() || rhs->isFloat()) && lhs->isDecimal()) {
+ return floating_equal_decimal(rhs, get_scaled_decimal(lhs));
+ }
+
+ if (lhs->isDouble() || lhs->isFloat()) {
+ return (rhs->isDouble() || rhs->isFloat()) && floating_value(lhs) ==
floating_value(rhs);
+ }
+
+ if (lhs->isDecimal()) {
+ const auto lhs_decimal = get_scaled_decimal(lhs);
+ if (rhs->isDecimal()) {
+ return scaled_decimal_equal_decimal(lhs_decimal,
get_scaled_decimal(rhs));
+ }
+ return scaled_decimal_equal_integer(lhs_decimal, rhs->int_val());
+ }
+
+ if (rhs->isDecimal()) {
+ return scaled_decimal_equal_integer(get_scaled_decimal(rhs),
lhs->int_val());
+ }
+
+ return lhs->int_val() == rhs->int_val();
+}
+
inline bool array_contains_value(const ArrayVal* target_array, const
JsonbValue* candidate) {
const int target_num = target_array->numElem();
for (int i = 0; i < target_num; ++i) {
@@ -1155,18 +1348,14 @@ inline bool JsonbValue::contains(const JsonbValue* rhs)
const {
case JsonbType::T_Int16:
case JsonbType::T_Int32:
case JsonbType::T_Int64:
- case JsonbType::T_Int128: {
- return rhs->isInt() && this->int_val() == rhs->int_val();
- }
+ case JsonbType::T_Int128:
case JsonbType::T_Double:
- case JsonbType::T_Float: {
- if (!rhs->isDouble() && !rhs->isFloat()) {
- return false;
- }
- double left = isDouble() ? unpack<JsonbDoubleVal>()->val() :
unpack<JsonbFloatVal>()->val();
- double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val()
- : rhs->unpack<JsonbFloatVal>()->val();
- return left == right;
+ case JsonbType::T_Float:
+ case JsonbType::T_Decimal32:
+ case JsonbType::T_Decimal64:
+ case JsonbType::T_Decimal128:
+ case JsonbType::T_Decimal256: {
+ return jsonb_detail::numeric_equal(this, rhs);
}
case JsonbType::T_String:
case JsonbType::T_Binary: {
@@ -1209,42 +1398,6 @@ inline bool JsonbValue::contains(const JsonbValue* rhs)
const {
case JsonbType::T_False: {
return rhs->isFalse();
}
- case JsonbType::T_Decimal32: {
- if (rhs->isDecimal32()) {
- return unpack<JsonbDecimal32>()->val() ==
rhs->unpack<JsonbDecimal32>()->val() &&
- unpack<JsonbDecimal32>()->precision ==
- rhs->unpack<JsonbDecimal32>()->precision &&
- unpack<JsonbDecimal32>()->scale ==
rhs->unpack<JsonbDecimal32>()->scale;
- }
- return false;
- }
- case JsonbType::T_Decimal64: {
- if (rhs->isDecimal64()) {
- return unpack<JsonbDecimal64>()->val() ==
rhs->unpack<JsonbDecimal64>()->val() &&
- unpack<JsonbDecimal64>()->precision ==
- rhs->unpack<JsonbDecimal64>()->precision &&
- unpack<JsonbDecimal64>()->scale ==
rhs->unpack<JsonbDecimal64>()->scale;
- }
- return false;
- }
- case JsonbType::T_Decimal128: {
- if (rhs->isDecimal128()) {
- return unpack<JsonbDecimal128>()->val() ==
rhs->unpack<JsonbDecimal128>()->val() &&
- unpack<JsonbDecimal128>()->precision ==
- rhs->unpack<JsonbDecimal128>()->precision &&
- unpack<JsonbDecimal128>()->scale ==
rhs->unpack<JsonbDecimal128>()->scale;
- }
- return false;
- }
- case JsonbType::T_Decimal256: {
- if (rhs->isDecimal256()) {
- return unpack<JsonbDecimal256>()->val() ==
rhs->unpack<JsonbDecimal256>()->val() &&
- unpack<JsonbDecimal256>()->precision ==
- rhs->unpack<JsonbDecimal256>()->precision &&
- unpack<JsonbDecimal256>()->scale ==
rhs->unpack<JsonbDecimal256>()->scale;
- }
- return false;
- }
case JsonbType::NUM_TYPES:
break;
}
diff --git a/be/test/core/jsonb/jsonb_document_test.cpp
b/be/test/core/jsonb/jsonb_document_test.cpp
index c189afb025a..a26f1f87e72 100644
--- a/be/test/core/jsonb/jsonb_document_test.cpp
+++ b/be/test/core/jsonb/jsonb_document_test.cpp
@@ -19,6 +19,7 @@
#include <gtest/gtest.h>
+#include <cmath>
#include <cstdint>
#include <limits>
#include <string>
@@ -37,6 +38,10 @@ protected:
void TearDown() override {}
};
+static const JsonbValue* jsonb_value(JsonbWriter& writer) {
+ return writer.getDocument()->getValue();
+}
+
TEST_F(JsonbDocumentTest, writer) {
JsonbWriter writer;
writer.writeStartObject();
@@ -295,4 +300,71 @@ TEST_F(JsonbDocumentTest, invaild_jsonb_document) {
EXPECT_EQ(json_null, json_string);
}
+TEST_F(JsonbDocumentTest, contains_numeric_equality) {
+ JsonbWriter int_writer;
+ ASSERT_TRUE(int_writer.writeInt(1));
+
+ JsonbWriter double_writer;
+ ASSERT_TRUE(double_writer.writeDouble(1.0));
+
+ JsonbWriter non_integer_double_writer;
+ ASSERT_TRUE(non_integer_double_writer.writeDouble(1.5));
+
+ JsonbWriter decimal32_writer;
+ ASSERT_TRUE(decimal32_writer.writeDecimal(Decimal32(int32_t(100)), 3, 2));
+
+ JsonbWriter decimal32_scale1_writer;
+ ASSERT_TRUE(decimal32_scale1_writer.writeDecimal(Decimal32(int32_t(10)),
2, 1));
+
+ JsonbWriter decimal64_writer;
+ ASSERT_TRUE(decimal64_writer.writeDecimal(Decimal64(int64_t(1000)), 4, 3));
+
+ JsonbWriter decimal128_non_integer_writer;
+
ASSERT_TRUE(decimal128_non_integer_writer.writeDecimal(Decimal128V3(int128_t(150)),
3, 2));
+
+ JsonbWriter large_int128_writer;
+ ASSERT_TRUE(large_int128_writer.writeInt128((int128_t(1) << 100) + 1));
+
+ JsonbWriter large_decimal128_writer;
+ ASSERT_TRUE(
+ large_decimal128_writer.writeDecimal(Decimal128V3((int128_t(1) <<
100) + 1), 31, 0));
+
+ JsonbWriter large_double_writer;
+ ASSERT_TRUE(large_double_writer.writeDouble(std::ldexp(1.0, 100)));
+
+ EXPECT_TRUE(jsonb_value(int_writer)->contains(jsonb_value(double_writer)));
+ EXPECT_TRUE(jsonb_value(double_writer)->contains(jsonb_value(int_writer)));
+
EXPECT_FALSE(jsonb_value(int_writer)->contains(jsonb_value(non_integer_double_writer)));
+
EXPECT_FALSE(jsonb_value(large_int128_writer)->contains(jsonb_value(large_double_writer)));
+
EXPECT_FALSE(jsonb_value(large_decimal128_writer)->contains(jsonb_value(large_double_writer)));
+
EXPECT_FALSE(jsonb_value(large_double_writer)->contains(jsonb_value(large_decimal128_writer)));
+
+
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(decimal32_scale1_writer)));
+
EXPECT_TRUE(jsonb_value(decimal32_scale1_writer)->contains(jsonb_value(decimal32_writer)));
+
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(decimal64_writer)));
+
EXPECT_TRUE(jsonb_value(decimal64_writer)->contains(jsonb_value(decimal32_writer)));
+
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(int_writer)));
+
EXPECT_TRUE(jsonb_value(int_writer)->contains(jsonb_value(decimal32_writer)));
+
EXPECT_TRUE(jsonb_value(decimal32_writer)->contains(jsonb_value(double_writer)));
+
EXPECT_TRUE(jsonb_value(double_writer)->contains(jsonb_value(decimal32_writer)));
+
EXPECT_FALSE(jsonb_value(decimal128_non_integer_writer)->contains(jsonb_value(int_writer)));
+
EXPECT_FALSE(jsonb_value(int_writer)->contains(jsonb_value(decimal128_non_integer_writer)));
+
EXPECT_FALSE(jsonb_value(decimal128_non_integer_writer)->contains(jsonb_value(double_writer)));
+
EXPECT_FALSE(jsonb_value(double_writer)->contains(jsonb_value(decimal128_non_integer_writer)));
+}
+
+TEST_F(JsonbDocumentTest, contains_invalid_decimal_scale) {
+ JsonbWriter invalid_decimal_writer;
+ ASSERT_TRUE(invalid_decimal_writer.writeDecimal(
+ Decimal32(int32_t(100)), 2,
static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE) + 1));
+
+ JsonbWriter double_writer;
+ ASSERT_TRUE(double_writer.writeDouble(1.0));
+
+
EXPECT_THROW(jsonb_value(invalid_decimal_writer)->contains(jsonb_value(double_writer)),
+ Exception);
+
EXPECT_THROW(jsonb_value(double_writer)->contains(jsonb_value(invalid_decimal_writer)),
+ Exception);
+}
+
} // namespace doris
diff --git
a/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
b/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
index b34789e4d73..ec516837c55 100644
---
a/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
+++
b/regression-test/data/external_table_p0/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out
@@ -54,7 +54,7 @@ true
true
-- !TestJsonFunctions_19 --
-false
+true
-- !TestJsonFunctions_20 --
\N
@@ -72,7 +72,7 @@ true
true
-- !TestJsonFunctions_25 --
-false
+true
-- !TestJsonFunctions_26 --
\N
@@ -93,7 +93,7 @@ true
true
-- !TestJsonFunctions_32 --
-false
+true
-- !TestJsonFunctions_33 --
\N
@@ -114,7 +114,7 @@ true
true
-- !TestJsonFunctions_39 --
-false
+true
-- !TestJsonFunctions_40 --
\N
@@ -381,7 +381,7 @@ true
true
-- !TestJsonFunctions_128 --
-false
+true
-- !TestJsonFunctions_129 --
\N
@@ -399,7 +399,7 @@ true
true
-- !TestJsonFunctions_134 --
-false
+true
-- !TestJsonFunctions_135 --
\N
@@ -420,7 +420,7 @@ true
true
-- !TestJsonFunctions_141 --
-false
+true
-- !TestJsonFunctions_142 --
\N
@@ -438,7 +438,7 @@ true
true
-- !TestJsonFunctions_147 --
-false
+true
-- !TestJsonFunctions_148 --
\N
diff --git
a/regression-test/data/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.out
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.out
new file mode 100644
index 00000000000..0b2cf72b233
--- /dev/null
+++
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.out
@@ -0,0 +1,3 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !json_contains_numeric_equality --
+true true true true true true false
diff --git
a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.groovy
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.groovy
new file mode 100644
index 00000000000..4e38b9601c8
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_contains_numeric_equality.groovy
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_contains_numeric_equality", "p0") {
+ order_qt_json_contains_numeric_equality """
+ SELECT JSON_CONTAINS('1', '1.0'),
+ JSON_CONTAINS('1.0', '1'),
+ JSON_CONTAINS('[1]', '1.0'),
+ JSON_CONTAINS('[1.0]', '1'),
+ JSON_CONTAINS('{"qty": 1}', '{"qty": 1.0}'),
+ JSON_CONTAINS('{"qty": 1.0}', '{"qty": 1}'),
+ JSON_CONTAINS('{"qty": 1}', '{"qty": 1.5}')
+ ORDER BY 1;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]