This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new f708be5c7d8 branch-4.1: [fix](decimal) Fix incorrect decimal cast
results for scientific-notation strings #63119 (#63184)
f708be5c7d8 is described below
commit f708be5c7d848c27fcb4e2fe3131a2c30575e99d
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed May 13 11:45:19 2026 +0800
branch-4.1: [fix](decimal) Fix incorrect decimal cast results for
scientific-notation strings #63119 (#63184)
Cherry-picked from #63119
Co-authored-by: TengJianPing <[email protected]>
---
be/src/util/string_parser.cpp | 49 +++++++++++++++++++---
.../column_array_update_crc32c_batch_37.out | 4 +-
...n_array_update_crc32c_batch_37.out_with_nullmap | 4 +-
.../column_array_update_crc32c_single_37.out | 2 +-
..._array_update_crc32c_single_37.out_with_nullmap | 2 +-
be/test/exprs/function/cast/cast_to_decimal.cpp | 33 ++++++++++++++-
6 files changed, 81 insertions(+), 13 deletions(-)
diff --git a/be/src/util/string_parser.cpp b/be/src/util/string_parser.cpp
index 5dcb65ae072..0b057467e12 100644
--- a/be/src/util/string_parser.cpp
+++ b/be/src/util/string_parser.cpp
@@ -41,6 +41,20 @@ namespace doris {
// <exponent> ::= <e_marker> <sign>? <digits>
//
// <e_marker> ::= "e" | "E"
+//
+// Parsing algorithm:
+// 1. Trim spaces and the sign, then normalize the significand by skipping
leading zeros and an
+// optional leading dot. During this scan, count digits that belong to the
original integral
+// part (`int_part_count`) and remember where the significand ends
(`end_digit_index`).
+// 2. Parse the optional exponent. Scientific notation is handled by moving
the decimal point:
+// `result_int_part_digit_count = int_part_count + exponent`. For example,
"12.34e-1" has
+// int_part_count=2 and exponent=-1, so the result has one integral digit:
"1.234".
+// 3. Build the result in scaled-integer form: first collect the integral
digits up to the shifted
+// decimal point, then collect up to `type_scale` fractional digits,
padding with zeros when the
+// input has fewer fractional digits than the target scale.
+// 4. If there are extra fractional digits, round half up using the first
discarded digit. Finally,
+// check the integral digit count against `type_precision - type_scale` and
return the signed
+// scaled integer value.
template <PrimitiveType P>
typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_decimal(
const char* __restrict s, size_t len, int type_precision, int
type_scale,
@@ -50,6 +64,16 @@ typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_dec
std::is_same_v<T, __int128> || std::is_same_v<T,
wide::Int256>,
"Cast string to decimal only support target type int32_t,
int64_t, __int128 or "
"wide::Int256.");
+
+ // Parse in two logical coordinate systems:
+ // 1. `s[0, end_digit_index)` is the normalized significand after trimming
spaces, sign and
+ // leading zeros. If the original value starts with '.', the dot is
also skipped so
+ // ".14E+3" is parsed as significand "14" with exponent 3.
+ // 2. `result_int_part_digit_count = int_part_count + exponent` is the
decimal point position
+ // after applying scientific notation. For example, "1.4E+2" has
int_part_count=1,
+ // exponent=2, result_int_part_digit_count=3, so "14" becomes integer
140.
+ // `digit_index` always indexes the normalized significand string, which
may still contain a
+ // dot for inputs like "1.4E+2"; loops that build numbers skip that dot
explicitly.
// Ignore leading and trailing spaces.
s = skip_ascii_whitespaces(s, len);
@@ -102,7 +126,9 @@ typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_dec
*result = StringParser::PARSE_FAILURE;
return 0;
}
- // parse exponent if any
+ // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so
later digit counts
+ // ignore exponent syntax. For "1.4E+2", end_digit_index points just after
"1.4", not after
+ // "E+2".
int64_t exponent = 0;
auto end_digit_index = i;
if (i != len) {
@@ -149,8 +175,6 @@ typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_dec
return 0;
}
}
- T int_part_number = 0;
- T frac_part_number = 0;
// TODO: check limit values of exponent and add UT
// max string len is config::string_type_length_soft_limit_bytes,
// whose max value is std::numeric_limits<int32_t>::max() - 4,
@@ -163,9 +187,15 @@ typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_dec
return 0;
}
int result_int_part_digit_count = tmp_result_int_part_digit_count;
+ T int_part_number = 0;
+ T frac_part_number = 0;
int actual_frac_part_count = 0;
int digit_index = 0;
if (result_int_part_digit_count >= 0) {
+ // `max_index` is the raw significand index where integer-part digits
stop. Add one extra
+ // raw character only when crossing an in-buffer dot, e.g. "1.4E+2"
must scan "1.4" to
+ // collect three integer digits after the exponent shift. It is capped
by end_digit_index
+ // because missing digits are appended later by multiplying with
powers of 10.
int max_index = std::min(found_dot ? (result_int_part_digit_count +
((int_part_count > 0 && exponent
> 0) ? 1 : 0))
: result_int_part_digit_count,
@@ -188,7 +218,11 @@ typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_dec
}
int_part_number = int_part_number * 10 + (s[digit_index] - '0');
}
- auto total_significant_digit_count = i - ((found_dot && int_part_count
> 0) ? 1 : 0);
+ // Count only significand digits, not exponent syntax. If the exponent
moves the decimal
+ // point past all available significant digits, append zeros by
scaling the integer part:
+ // "1.4E+2" scans integer 14, total_significant_digit_count=2, then
multiplies by 10.
+ auto total_significant_digit_count =
+ end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
if (result_int_part_digit_count > total_significant_digit_count) {
int_part_number *=
get_scale_multiplier<T>(result_int_part_digit_count -
total_significant_digit_count);
@@ -206,8 +240,11 @@ typename PrimitiveTypeTraits<P>::CppType::NativeType
StringParser::string_to_dec
++actual_frac_part_count;
}
auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
- // there are still extra fraction digits left, check rounding
- if (digit_index != end_digit_index) {
+ // Round only when the next parsed significand digit is exactly the first
discarded fractional
+ // digit. If `actual_frac_part_count` is already greater than type_scale,
the missing positions
+ // are implicit zeros from a negative exponent, so "5e-17" to scale 15
must stay 0 instead of
+ // rounding up.
+ if (actual_frac_part_count == type_scale && digit_index !=
end_digit_index) {
if (UNLIKELY(s[digit_index] == '.')) {
++digit_index;
}
diff --git
a/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out
b/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out
index d699fb6177a..c419670b58f 100644
---
a/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out
+++
b/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out
@@ -2,5 +2,5 @@
0
0
4230634956
-166888020
-1932016285
\ No newline at end of file
+572890395
+2601481115
\ No newline at end of file
diff --git
a/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out_with_nullmap
b/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out_with_nullmap
index d699fb6177a..c419670b58f 100644
---
a/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out_with_nullmap
+++
b/be/test/expected_result/vec/columns/column_array_update_crc32c_batch_37.out_with_nullmap
@@ -2,5 +2,5 @@
0
0
4230634956
-166888020
-1932016285
\ No newline at end of file
+572890395
+2601481115
\ No newline at end of file
diff --git
a/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out
b/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out
index 45a1f82c6f1..faaab6bc634 100644
---
a/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out
+++
b/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out
@@ -1 +1 @@
-106414486;4062799302;0
\ No newline at end of file
+880726687;3657333385;0
\ No newline at end of file
diff --git
a/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out_with_nullmap
b/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out_with_nullmap
index 45a1f82c6f1..faaab6bc634 100644
---
a/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out_with_nullmap
+++
b/be/test/expected_result/vec/columns/column_array_update_crc32c_single_37.out_with_nullmap
@@ -1 +1 @@
-106414486;4062799302;0
\ No newline at end of file
+880726687;3657333385;0
\ No newline at end of file
diff --git a/be/test/exprs/function/cast/cast_to_decimal.cpp
b/be/test/exprs/function/cast/cast_to_decimal.cpp
index 677c7a3fa16..b8e1bef4825 100644
--- a/be/test/exprs/function/cast/cast_to_decimal.cpp
+++ b/be/test/exprs/function/cast/cast_to_decimal.cpp
@@ -88,6 +88,37 @@ TEST_F(FunctionCastToDecimalTest,
test_from_string_invalid_input) {
int table_index = 0;
from_string_invalid_input_test_func<Decimal32>(9, 3, table_index++);
}
+
+TEST_F(FunctionCastToDecimalTest, test_from_string_scientific_notation) {
+ InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR};
+ DataSet data_set = {
+ {{std::string("1.4E+2")}, DECIMAL128V3(140, 0, 15)},
+ {{std::string(".14E+3")}, DECIMAL128V3(140, 0, 15)},
+ {{std::string("0.001E+5")}, DECIMAL128V3(100, 0, 15)},
+ {{std::string("1.E+2")}, DECIMAL128V3(100, 0, 15)},
+ {{std::string("1.4E+0")}, DECIMAL128V3(1, 400000000000000, 15)},
+ {{std::string("1.4E-2")}, DECIMAL128V3(0, 14000000000000, 15)},
+ };
+ check_function_for_cast<DataTypeDecimal<Decimal128V3::PType>>(input_types,
data_set, 15, 38);
+}
+
+TEST_F(FunctionCastToDecimalTest, string_parser_scientific_rounding) {
+ auto parse_decimal128 = [](std::string_view value) {
+ StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
+ auto parsed =
StringParser::string_to_decimal<TYPE_DECIMAL128I>(value.data(), value.size(),
+ 38,
15, &result);
+ EXPECT_EQ(result, StringParser::PARSE_SUCCESS);
+ return parsed;
+ };
+
+ EXPECT_EQ(parse_decimal128("5e-16"), 1);
+ EXPECT_EQ(parse_decimal128("5e-17"), 0);
+ EXPECT_EQ(parse_decimal128("9e-17"), 0);
+ EXPECT_EQ(parse_decimal128("-5e-17"), 0);
+ EXPECT_EQ(parse_decimal128("0.0000000000000005"), 1);
+ EXPECT_EQ(parse_decimal128("0.00000000000000005"), 0);
+}
+
TEST_F(FunctionCastToDecimalTest, test_from_bool) {
from_bool_test_func<Decimal32>(9, 0);
from_bool_test_func<Decimal32>(9, 1);
@@ -122,4 +153,4 @@ TEST_F(FunctionCastToDecimalTest, test_from_bool_overflow) {
from_bool_overflow_test_func<Decimal128V3>();
from_bool_overflow_test_func<Decimal256>();
}
-} // namespace doris
\ No newline at end of file
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]