This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 677b46b0eff98126eccbb938b43a02a3aeda6b30 Author: amory <[email protected]> AuthorDate: Wed Jul 12 19:15:48 2023 +0800 [FIX](serde)Fix decimal for arrow serde (#21716) --- .../data_types/serde/data_type_decimal_serde.cpp | 26 ++++---- .../serde/data_type_serde_arrow_test.cpp | 78 +++++++++++++++++++++- 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index 4137dd885c..5e46c996e7 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -80,10 +80,8 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const array_builder->type()->name()); continue; } - const auto& data_ref = col.get_data_at(i); - const int32_t* p_value = reinterpret_cast<const int32_t*>(data_ref.data); - int64_t high = *p_value > 0 ? 0 : 1UL << 63; - arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value); + Int128 p_value = Int128(col.get_element(i)); + arrow::Decimal128 value(reinterpret_cast<const uint8_t*>(&p_value)); checkArrowStatus(builder.Append(value), column.get_name(), array_builder->type()->name()); } @@ -96,10 +94,8 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const array_builder->type()->name()); continue; } - const auto& data_ref = col.get_data_at(i); - const int64_t* p_value = reinterpret_cast<const int64_t*>(data_ref.data); - int64_t high = *p_value > 0 ? 0 : 1UL << 63; - arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value); + Int128 p_value = Int128(col.get_element(i)); + arrow::Decimal128 value(reinterpret_cast<const uint8_t*>(&p_value)); checkArrowStatus(builder.Append(value), column.get_name(), array_builder->type()->name()); } @@ -112,13 +108,13 @@ template <typename T> void DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const { + auto concrete_array = down_cast<const arrow::DecimalArray*>(arrow_array); + const auto* arrow_decimal_type = + static_cast<const arrow::DecimalType*>(arrow_array->type().get()); + const auto arrow_scale = arrow_decimal_type->scale(); + auto& column_data = static_cast<ColumnDecimal<T>&>(column).get_data(); if constexpr (std::is_same_v<T, Decimal<Int128>>) { - auto& column_data = static_cast<ColumnDecimal<vectorized::Decimal128>&>(column).get_data(); - auto concrete_array = down_cast<const arrow::DecimalArray*>(arrow_array); - const auto* arrow_decimal_type = - static_cast<const arrow::DecimalType*>(arrow_array->type().get()); // TODO check precision - const auto arrow_scale = arrow_decimal_type->scale(); for (size_t value_i = start; value_i < end; ++value_i) { auto value = *reinterpret_cast<const vectorized::Decimal128*>( concrete_array->Value(value_i)); @@ -140,6 +136,10 @@ void DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column, } column_data.emplace_back(value); } + } else if constexpr (std::is_same_v<T, Decimal64> || std::is_same_v<T, Decimal32>) { + for (size_t value_i = start; value_i < end; ++value_i) { + column_data.emplace_back(*reinterpret_cast<const T*>(concrete_array->Value(value_i))); + } } else { LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; } diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp index a51cb9bc90..7792d40839 100644 --- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp @@ -51,6 +51,7 @@ #include "util/arrow/row_batch.h" #include "util/bitmap_value.h" #include "util/quantile_state.h" +#include "util/string_parser.hpp" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" @@ -77,6 +78,7 @@ #include "vec/data_types/data_type_time_v2.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/arrow_column_to_doris_column.h" + namespace doris::vectorized { template <bool is_scalar> @@ -84,12 +86,16 @@ void serialize_and_deserialize_arrow_test() { vectorized::Block block; std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> cols; if constexpr (is_scalar) { - cols = {{"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false}, + cols = { + {"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false}, {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true}, {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, false}, {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, TYPE_DECIMAL128I, false}, {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, TYPE_DATETIME, false}, - {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}}; + {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}, + {"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5, TYPE_DECIMAL32, false}, + {"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6, TYPE_DECIMAL64, false}, + }; } else { cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true}, {"m", FieldType::OLAP_FIELD_TYPE_MAP, 8, TYPE_MAP, true}, @@ -153,6 +159,72 @@ void serialize_and_deserialize_arrow_test() { block.insert(std::move(type_and_name)); } break; + case TYPE_DECIMAL32: + type_desc.precision = 9; + type_desc.scale = 2; + tslot.__set_slotType(type_desc.to_thrift()); + { + vectorized::DataTypePtr decimal_data_type = + std::make_shared<DataTypeDecimal<Decimal32>>(type_desc.precision, + type_desc.scale); + auto decimal_column = decimal_data_type->create_column(); + auto& data = ((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int32>>*) + decimal_column.get()) + ->get_data(); + for (int i = 0; i < row_num; ++i) { + if (i == 0) { + data.push_back(Int32(0)); + continue; + } + Int32 val; + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + i % 2 == 0 ? val = StringParser::string_to_decimal<__int128>( + "1234567.56", 11, type_desc.precision, type_desc.scale, + &result) + : val = StringParser::string_to_decimal<__int128>( + "-1234567.56", 12, type_desc.precision, type_desc.scale, + &result); + EXPECT_TRUE(result == StringParser::PARSE_SUCCESS); + data.push_back(val); + } + + vectorized::ColumnWithTypeAndName type_and_name(decimal_column->get_ptr(), + decimal_data_type, col_name); + block.insert(type_and_name); + } + break; + case TYPE_DECIMAL64: + type_desc.precision = 18; + type_desc.scale = 6; + tslot.__set_slotType(type_desc.to_thrift()); + { + vectorized::DataTypePtr decimal_data_type = + std::make_shared<DataTypeDecimal<Decimal64>>(type_desc.precision, + type_desc.scale); + auto decimal_column = decimal_data_type->create_column(); + auto& data = ((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int64>>*) + decimal_column.get()) + ->get_data(); + for (int i = 0; i < row_num; ++i) { + if (i == 0) { + data.push_back(Int64(0)); + continue; + } + Int64 val; + StringParser::ParseResult result = StringParser::PARSE_SUCCESS; + std::string decimal_string = + i % 2 == 0 ? "-123456789012.123456" : "123456789012.123456"; + val = StringParser::string_to_decimal<__int128>( + decimal_string.c_str(), decimal_string.size(), type_desc.precision, + type_desc.scale, &result); + EXPECT_TRUE(result == StringParser::PARSE_SUCCESS); + data.push_back(val); + } + vectorized::ColumnWithTypeAndName type_and_name(decimal_column->get_ptr(), + decimal_data_type, col_name); + block.insert(type_and_name); + } + break; case TYPE_DECIMAL128I: type_desc.precision = 27; type_desc.scale = 9; @@ -362,7 +434,7 @@ void serialize_and_deserialize_arrow_test() { // serialize std::shared_ptr<arrow::RecordBatch> result; - std::cout << "block structure: " << block.dump_structure() << std::endl; + std::cout << "block data: " << block.dump_data(0, row_num) << std::endl; std::cout << "_arrow_schema: " << _arrow_schema->ToString(true) << std::endl; convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), &result); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
