This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ce397a8d32 [FIX](map)fix arrow serde with map null key #21955
ce397a8d32 is described below
commit ce397a8d32641fe34e28968a1c560940a548f3a0
Author: amory <[email protected]>
AuthorDate: Wed Jul 19 12:09:34 2023 +0800
[FIX](map)fix arrow serde with map null key #21955
---
.../vec/data_types/serde/data_type_map_serde.cpp | 19 ++++++++++--
.../serde/data_type_serde_arrow_test.cpp | 36 ++++++++++++++++++++--
2 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index edb21a60ef..fcf67a8f53 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -67,9 +67,22 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn&
column, const NullMa
array_builder->type()->name());
} else if (simd::contain_byte(keys_nullmap_data + offsets[r - 1],
offsets[r] - offsets[r - 1], 1)) {
- // arrow do not support key is null so we just put null with this
row
- checkArrowStatus(builder.AppendNull(), column.get_name(),
- array_builder->type()->name());
+ // arrow do not support key is null, so we ignore the null
key-value
+ MutableColumnPtr key_mutable_data =
nested_keys_column.clone_empty();
+ MutableColumnPtr value_mutable_data =
nested_values_column.clone_empty();
+ for (size_t i = offsets[r - 1]; i < offsets[r]; ++i) {
+ if (keys_nullmap_data[i] == 1) {
+ continue;
+ }
+ key_mutable_data->insert_from(nested_keys_column, i);
+ value_mutable_data->insert_from(nested_values_column, i);
+ }
+ checkArrowStatus(builder.Append(), column.get_name(),
array_builder->type()->name());
+
+ key_serde->write_column_to_arrow(*key_mutable_data, nullptr,
key_builder, 0,
+ key_mutable_data->size());
+ value_serde->write_column_to_arrow(*value_mutable_data, nullptr,
value_builder, 0,
+ value_mutable_data->size());
} else {
checkArrowStatus(builder.Append(), column.get_name(),
array_builder->type()->name());
key_serde->write_column_to_arrow(nested_keys_column, nullptr,
key_builder,
diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
index c1913e6d86..92fbcc97c0 100644
--- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
@@ -56,6 +56,7 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_map.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
@@ -76,6 +77,7 @@
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
#include "vec/data_types/data_type_time_v2.h"
+#include "vec/io/io_helper.h"
#include "vec/runtime/vdatetime_value.h"
#include "vec/utils/arrow_column_to_doris_column.h"
@@ -95,6 +97,7 @@ void serialize_and_deserialize_arrow_test() {
{"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN,
false},
{"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5,
TYPE_DECIMAL32, false},
{"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6,
TYPE_DECIMAL64, false},
+ {"k12", FieldType::OLAP_FIELD_TYPE_DATETIMEV2, 12,
TYPE_DATETIMEV2, false},
};
} else {
cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true},
@@ -327,6 +330,28 @@ void serialize_and_deserialize_arrow_test() {
block.insert(test_datetime);
}
break;
+ case TYPE_DATETIMEV2: // uint64
+ tslot.__set_slotType(type_desc.to_thrift());
+ {
+ // 2022-01-01 11:11:11.111
+ auto column_vector_datetimev2 =
+ vectorized::ColumnVector<vectorized::UInt64>::create();
+ // auto& datetimev2_data =
column_vector_datetimev2->get_data();
+ DateV2Value<DateTimeV2ValueType> value;
+ string date_literal = "2022-01-01 11:11:11.111";
+ value.from_date_str(date_literal.c_str(), date_literal.size());
+ char to[64] = {};
+ std::cout << "value: " << value.to_string(to) << std::endl;
+ for (int i = 0; i < row_num; ++i) {
+ column_vector_datetimev2->insert(value.to_date_int_val());
+ }
+ vectorized::DataTypePtr datetimev2_type(
+ std::make_shared<vectorized::DataTypeDateTimeV2>());
+ vectorized::ColumnWithTypeAndName test_datetimev2(
+ column_vector_datetimev2->get_ptr(), datetimev2_type,
col_name);
+ block.insert(test_datetimev2);
+ }
+ break;
case TYPE_ARRAY: // array
type_desc.add_sub_type(TYPE_STRING, true);
tslot.__set_slotType(type_desc.to_thrift());
@@ -487,6 +512,11 @@ void serialize_and_deserialize_arrow_test() {
}
}
continue;
+ } else if (std::get<3>(t) == PrimitiveType::TYPE_DATETIMEV2) {
+ // now we only support read doris datetimev2 to arrow
+ block.erase(real_column_name);
+ new_block.erase(real_column_name);
+ continue;
}
arrow_column_to_doris_column(array, 0,
column_with_type_and_name.column,
column_with_type_and_name.type,
block.rows(), "UTC");
@@ -579,9 +609,9 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) {
column_with_type_and_name.type, block.rows(),
"UTC");
std::cout << block.dump_data() << std::endl;
std::cout << new_block.dump_data() << std::endl;
- // new block row_index 0, 2 is should be empty
- EXPECT_EQ(new_block.dump_one_line(0, 1), "{}");
- EXPECT_EQ(new_block.dump_one_line(2, 1), "{}");
+ // new block row_index 0, 2 which row has key null will be filter
+ EXPECT_EQ(new_block.dump_one_line(0, 1), "{\"doris\":null, \"clever
amory\":30}");
+ EXPECT_EQ(new_block.dump_one_line(2, 1), "{\"test\":11}");
EXPECT_EQ(block.dump_data(1, 1), new_block.dump_data(1, 1));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]