[doris] branch branch-2.0 updated: [FIX](decimalv3)Fix decimalv3 with precision #24241 (#24326)

kxiao Wed, 13 Sep 2023 07:20:38 -0700

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 3d4497b9a4 [FIX](decimalv3)Fix decimalv3 with precision #24241 (#24326)
3d4497b9a4 is described below

commit 3d4497b9a4a282f70cde74aec1c591c424a7fec8
Author: amory <[email protected]>
AuthorDate: Wed Sep 13 22:19:55 2023 +0800

    [FIX](decimalv3)Fix decimalv3 with precision #24241 (#24326)
---
 be/src/util/string_parser.hpp                      | 14 ++++---
 be/src/vec/data_types/data_type_decimal.cpp        | 18 ++++----
 .../data_types/serde/data_type_decimal_serde.cpp   | 18 ++++----
 be/src/vec/functions/function_cast.h               | 48 ++++++++++++++++------
 be/src/vec/io/io_helper.h                          | 10 +++--
 .../suites/query_p0/cast/test_cast.groovy          | 31 ++++++++++++++
 6 files changed, 101 insertions(+), 38 deletions(-)

diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index a59d6bf7de..e96ac4e3c2 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -722,13 +722,10 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
                     return 0;
                 }
                 *result = StringParser::PARSE_SUCCESS;
-                if constexpr (std::is_same_v<T, vectorized::Int128I>) {
-                    value *= get_scale_multiplier<__int128>(type_scale - 
scale);
-                } else {
+
+                if (type_scale > scale) {
                     value *= get_scale_multiplier<T>(type_scale - scale);
                 }
-
-                return is_negative ? T(-value) : T(value);
             }
         }
     }
@@ -762,6 +759,13 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
     *result = StringParser::PARSE_SUCCESS;
     if (UNLIKELY(precision - scale > type_precision - type_scale)) {
         *result = StringParser::PARSE_OVERFLOW;
+        if constexpr (TYPE_DECIMALV2 != P) {
+            // decimalv3 overflow will return max min value for type precision
+            value = is_negative
+                            ? 
vectorized::min_decimal_value<vectorized::Decimal<T>>(type_precision)
+                            : 
vectorized::max_decimal_value<vectorized::Decimal<T>>(type_precision);
+            return value;
+        }
     } else if (UNLIKELY(scale > type_scale)) {
         *result = StringParser::PARSE_UNDERFLOW;
         int shift = scale - type_scale;
diff --git a/be/src/vec/data_types/data_type_decimal.cpp 
b/be/src/vec/data_types/data_type_decimal.cpp
index deb559f75b..5aea3bea2d 100644
--- a/be/src/vec/data_types/data_type_decimal.cpp
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -87,15 +87,17 @@ void DataTypeDecimal<T>::to_string(const IColumn& column, 
size_t row_num,
 template <typename T>
 Status DataTypeDecimal<T>::from_string(ReadBuffer& rb, IColumn* column) const {
     auto& column_data = static_cast<ColumnType&>(*column).get_data();
-    T val = 0;
-    if (!read_decimal_text_impl<DataTypeDecimalSerDe<T>::get_primitive_type(), 
T>(
-                val, rb, precision, scale)) {
-        return Status::InvalidArgument("parse decimal fail, string: '{}', 
primitive type: '{}'",
-                                       std::string(rb.position(), 
rb.count()).c_str(),
-                                       
DataTypeDecimalSerDe<T>::get_primitive_type());
+    T val {};
+    StringParser::ParseResult res =
+            
read_decimal_text_impl<DataTypeDecimalSerDe<T>::get_primitive_type(), T>(
+                    val, rb, precision, scale);
+    if (res == StringParser::PARSE_SUCCESS || res == 
StringParser::PARSE_UNDERFLOW) {
+        column_data.emplace_back(val);
+        return Status::OK();
     }
-    column_data.emplace_back(val);
-    return Status::OK();
+    return Status::InvalidArgument("parse decimal fail, string: '{}', 
primitive type: '{}'",
+                                   std::string(rb.position(), 
rb.count()).c_str(),
+                                   
DataTypeDecimalSerDe<T>::get_primitive_type());
 }
 
 // binary: row_num | value1 | value2 | ...
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index fdf9d41128..9ffe461f68 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -70,15 +70,17 @@ template <typename T>
 Status DataTypeDecimalSerDe<T>::deserialize_one_cell_from_json(IColumn& 
column, Slice& slice,
                                                                const 
FormatOptions& options) const {
     auto& column_data = assert_cast<ColumnDecimal<T>&>(column).get_data();
-    T val = 0;
-    if (ReadBuffer rb(slice.data, slice.size);
-        !read_decimal_text_impl<get_primitive_type(), T>(val, rb, precision, 
scale)) {
-        return Status::InvalidArgument("parse decimal fail, string: '{}', 
primitive type: '{}'",
-                                       std::string(rb.position(), 
rb.count()).c_str(),
-                                       get_primitive_type());
+    T val = {};
+    ReadBuffer rb(slice.data, slice.size);
+    StringParser::ParseResult res =
+            read_decimal_text_impl<get_primitive_type(), T>(val, rb, 
precision, scale);
+    if (res == StringParser::PARSE_SUCCESS || res == 
StringParser::PARSE_UNDERFLOW) {
+        column_data.emplace_back(val);
+        return Status::OK();
     }
-    column_data.emplace_back(val);
-    return Status::OK();
+    return Status::InvalidArgument("parse decimal fail, string: '{}', 
primitive type: '{}'",
+                                   std::string(rb.position(), 
rb.count()).c_str(),
+                                   get_primitive_type());
 }
 
 template <typename T>
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 6a6a348e15..5d4a1716b1 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -117,6 +117,11 @@ inline UInt32 extract_to_decimal_scale(const 
ColumnWithTypeAndName& named_column
     named_column.column->get(0, field);
     return field.get<UInt32>();
 }
+
+struct PrecisionScaleArg {
+    UInt32 precision;
+    UInt32 scale;
+};
 /** Cast from string or number to Time.
   * In Doris, the underlying storage type of the Time class is Float64.
   */
@@ -270,7 +275,7 @@ struct ConvertImpl {
                     check_and_get_column<ColVecFrom>(named_from.column.get())) 
{
             typename ColVecTo::MutablePtr col_to = nullptr;
             if constexpr (IsDataTypeDecimal<ToDataType>) {
-                UInt32 scale = additions;
+                UInt32 scale = ((PrecisionScaleArg)additions).scale;
                 ToDataType::check_type_scale(scale);
                 col_to = ColVecTo::create(0, scale);
             } else {
@@ -897,25 +902,37 @@ bool try_parse_impl(typename DataType::FieldType& x, 
ReadBuffer& rb,
     if constexpr (std::is_integral_v<typename DataType::FieldType>) {
         return try_read_int_text(x, rb);
     }
+}
 
+template <typename DataType, typename Additions = void*>
+StringParser::ParseResult try_parse_decimal_impl(typename DataType::FieldType& 
x, ReadBuffer& rb,
+                                                 const cctz::time_zone& 
local_time_zone,
+                                                 ZoneList& time_zone_cache,
+                                                 std::shared_mutex& cache_lock,
+                                                 Additions additions
+                                                 [[maybe_unused]] = 
Additions()) {
     if constexpr (IsDataTypeDecimalV2<DataType>) {
-        UInt32 scale = additions;
-        return try_read_decimal_text<TYPE_DECIMALV2>(x, rb, 
DataType::max_precision(), scale);
+        UInt32 scale = ((PrecisionScaleArg)additions).scale;
+        UInt32 precision = ((PrecisionScaleArg)additions).precision;
+        return try_read_decimal_text<TYPE_DECIMALV2>(x, rb, precision, scale);
     }
 
     if constexpr (std::is_same_v<DataTypeDecimal<Decimal32>, DataType>) {
-        UInt32 scale = additions;
-        return try_read_decimal_text<TYPE_DECIMAL32>(x, rb, 
DataType::max_precision(), scale);
+        UInt32 scale = ((PrecisionScaleArg)additions).scale;
+        UInt32 precision = ((PrecisionScaleArg)additions).precision;
+        return try_read_decimal_text<TYPE_DECIMAL32>(x, rb, precision, scale);
     }
 
     if constexpr (std::is_same_v<DataTypeDecimal<Decimal64>, DataType>) {
-        UInt32 scale = additions;
-        return try_read_decimal_text<TYPE_DECIMAL64>(x, rb, 
DataType::max_precision(), scale);
+        UInt32 scale = ((PrecisionScaleArg)additions).scale;
+        UInt32 precision = ((PrecisionScaleArg)additions).precision;
+        return try_read_decimal_text<TYPE_DECIMAL64>(x, rb, precision, scale);
     }
 
     if constexpr (IsDataTypeDecimal128I<DataType>) {
-        UInt32 scale = additions;
-        return try_read_decimal_text<TYPE_DECIMAL128I>(x, rb, 
DataType::max_precision(), scale);
+        UInt32 scale = ((PrecisionScaleArg)additions).scale;
+        UInt32 precision = ((PrecisionScaleArg)additions).precision;
+        return try_read_decimal_text<TYPE_DECIMAL128I>(x, rb, precision, 
scale);
     }
 }
 
@@ -1346,7 +1363,7 @@ struct ConvertThroughParsing {
         typename ColVecTo::MutablePtr col_to = nullptr;
 
         if constexpr (IsDataTypeDecimal<ToDataType>) {
-            UInt32 scale = additions;
+            UInt32 scale = ((PrecisionScaleArg)additions).scale;
             ToDataType::check_type_scale(scale);
             col_to = ColVecTo::create(size, scale);
         } else {
@@ -1382,9 +1399,13 @@ struct ConvertThroughParsing {
 
             bool parsed;
             if constexpr (IsDataTypeDecimal<ToDataType>) {
-                parsed = try_parse_impl<ToDataType>(
+                
ToDataType::check_type_precision((PrecisionScaleArg(additions).precision));
+                StringParser::ParseResult res = 
try_parse_decimal_impl<ToDataType>(
                         vec_to[i], read_buffer, 
context->state()->timezone_obj(), time_zone_cache,
-                        cache_lock, vec_to.get_scale());
+                        cache_lock, PrecisionScaleArg(additions));
+                parsed = (res == StringParser::PARSE_SUCCESS ||
+                          res == StringParser::PARSE_OVERFLOW ||
+                          res == StringParser::PARSE_UNDERFLOW);
             } else if constexpr (IsDataTypeDateTimeV2<ToDataType>) {
                 auto type = check_and_get_data_type<DataTypeDateTimeV2>(
                         block.get_by_position(result).type.get());
@@ -1625,7 +1646,8 @@ private:
 
                         auto state = ConvertImpl<LeftDataType, RightDataType, 
NameCast>::execute(
                                 context, block, arguments, result, 
input_rows_count,
-                                context->check_overflow_for_decimal(), scale);
+                                context->check_overflow_for_decimal(),
+                                PrecisionScaleArg {precision, scale});
                         if (!state) {
                             throw Exception(state.code(), state.to_string());
                         }
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index c8bc6d7c7c..42918ca4c0 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -380,7 +380,8 @@ bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, 
const cctz::time_zone& lo
 }
 
 template <PrimitiveType P, typename T>
-bool read_decimal_text_impl(T& x, ReadBuffer& buf, UInt32 precision, UInt32 
scale) {
+StringParser::ParseResult read_decimal_text_impl(T& x, ReadBuffer& buf, UInt32 
precision,
+                                                 UInt32 scale) {
     static_assert(IsDecimalNumber<T>);
     if constexpr (!std::is_same_v<Decimal128, T>) {
         StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
@@ -389,7 +390,7 @@ bool read_decimal_text_impl(T& x, ReadBuffer& buf, UInt32 
precision, UInt32 scal
                 (const char*)buf.position(), buf.count(), precision, scale, 
&result);
         // only to match the is_all_read() check to prevent return null
         buf.position() = buf.end();
-        return result == StringParser::PARSE_SUCCESS || result == 
StringParser::PARSE_UNDERFLOW;
+        return result;
     } else {
         StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
 
@@ -400,7 +401,7 @@ bool read_decimal_text_impl(T& x, ReadBuffer& buf, UInt32 
precision, UInt32 scal
         // only to match the is_all_read() check to prevent return null
         buf.position() = buf.end();
 
-        return result == StringParser::PARSE_SUCCESS || result == 
StringParser::PARSE_UNDERFLOW;
+        return result;
     }
 }
 
@@ -450,7 +451,8 @@ bool try_read_float_text(T& x, ReadBuffer& in) {
 }
 
 template <PrimitiveType P, typename T>
-bool try_read_decimal_text(T& x, ReadBuffer& in, UInt32 precision, UInt32 
scale) {
+StringParser::ParseResult try_read_decimal_text(T& x, ReadBuffer& in, UInt32 
precision,
+                                                UInt32 scale) {
     return read_decimal_text_impl<P, T>(x, in, precision, scale);
 }
 
diff --git a/regression-test/suites/query_p0/cast/test_cast.groovy 
b/regression-test/suites/query_p0/cast/test_cast.groovy
index 59d86eb80e..1063845775 100644
--- a/regression-test/suites/query_p0/cast/test_cast.groovy
+++ b/regression-test/suites/query_p0/cast/test_cast.groovy
@@ -32,6 +32,37 @@ suite('test_cast') {
         result([[869930357, 20200101123445l, ((float) 20200101123445l), 
((double) 20200101123445l)]])
     }
 
+    test {
+        sql " select cast('9999e-1' as DECIMALV3(2, 1)) "
+        result([[9.9]])
+    }
+
+    test {
+        sql " select cast('100000' as DECIMALV3(2, 1)) "
+        result([[9.9]])
+    }
+
+    test {
+        sql " select cast('-9999e-1' as DECIMALV3(2, 1)) "
+        result([[-9.9]])
+    }
+
+
+    test {
+        sql " select cast('100000' as DECIMALV3(2, 1)) "
+        result([[9.9]])
+    }
+
+    test {
+        sql "select cast('0.2147483648e3' as DECIMALV3(2, 1))"
+        result([[9.9]])
+    }
+
+    test {
+        sql "select cast('0.2147483648e-3' as DECIMALV3(2, 1))"
+        result([[0.0]])
+    }
+
     def tbl = "test_cast"
 
     sql """ DROP TABLE IF EXISTS ${tbl}"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch branch-2.0 updated: [FIX](decimalv3)Fix decimalv3 with precision #24241 (#24326)

Reply via email to