This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ce397a8d32 [FIX](map)fix arrow serde with map null key #21955
ce397a8d32 is described below

commit ce397a8d32641fe34e28968a1c560940a548f3a0
Author: amory <[email protected]>
AuthorDate: Wed Jul 19 12:09:34 2023 +0800

    [FIX](map)fix arrow serde with map null key #21955
---
 .../vec/data_types/serde/data_type_map_serde.cpp   | 19 ++++++++++--
 .../serde/data_type_serde_arrow_test.cpp           | 36 ++++++++++++++++++++--
 2 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp 
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index edb21a60ef..fcf67a8f53 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -67,9 +67,22 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn& 
column, const NullMa
                              array_builder->type()->name());
         } else if (simd::contain_byte(keys_nullmap_data + offsets[r - 1],
                                       offsets[r] - offsets[r - 1], 1)) {
-            // arrow do not support key is null so we just put null with this 
row
-            checkArrowStatus(builder.AppendNull(), column.get_name(),
-                             array_builder->type()->name());
+            // arrow do not support key is null, so we ignore the null 
key-value
+            MutableColumnPtr key_mutable_data = 
nested_keys_column.clone_empty();
+            MutableColumnPtr value_mutable_data = 
nested_values_column.clone_empty();
+            for (size_t i = offsets[r - 1]; i < offsets[r]; ++i) {
+                if (keys_nullmap_data[i] == 1) {
+                    continue;
+                }
+                key_mutable_data->insert_from(nested_keys_column, i);
+                value_mutable_data->insert_from(nested_values_column, i);
+            }
+            checkArrowStatus(builder.Append(), column.get_name(), 
array_builder->type()->name());
+
+            key_serde->write_column_to_arrow(*key_mutable_data, nullptr, 
key_builder, 0,
+                                             key_mutable_data->size());
+            value_serde->write_column_to_arrow(*value_mutable_data, nullptr, 
value_builder, 0,
+                                               value_mutable_data->size());
         } else {
             checkArrowStatus(builder.Append(), column.get_name(), 
array_builder->type()->name());
             key_serde->write_column_to_arrow(nested_keys_column, nullptr, 
key_builder,
diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
index c1913e6d86..92fbcc97c0 100644
--- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
@@ -56,6 +56,7 @@
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_complex.h"
 #include "vec/columns/column_decimal.h"
+#include "vec/columns/column_map.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_vector.h"
@@ -76,6 +77,7 @@
 #include "vec/data_types/data_type_string.h"
 #include "vec/data_types/data_type_struct.h"
 #include "vec/data_types/data_type_time_v2.h"
+#include "vec/io/io_helper.h"
 #include "vec/runtime/vdatetime_value.h"
 #include "vec/utils/arrow_column_to_doris_column.h"
 
@@ -95,6 +97,7 @@ void serialize_and_deserialize_arrow_test() {
                 {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, 
false},
                 {"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5, 
TYPE_DECIMAL32, false},
                 {"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6, 
TYPE_DECIMAL64, false},
+                {"k12", FieldType::OLAP_FIELD_TYPE_DATETIMEV2, 12, 
TYPE_DATETIMEV2, false},
         };
     } else {
         cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true},
@@ -327,6 +330,28 @@ void serialize_and_deserialize_arrow_test() {
                 block.insert(test_datetime);
             }
             break;
+        case TYPE_DATETIMEV2: // uint64
+            tslot.__set_slotType(type_desc.to_thrift());
+            {
+                // 2022-01-01 11:11:11.111
+                auto column_vector_datetimev2 =
+                        vectorized::ColumnVector<vectorized::UInt64>::create();
+                //                auto& datetimev2_data = 
column_vector_datetimev2->get_data();
+                DateV2Value<DateTimeV2ValueType> value;
+                string date_literal = "2022-01-01 11:11:11.111";
+                value.from_date_str(date_literal.c_str(), date_literal.size());
+                char to[64] = {};
+                std::cout << "value: " << value.to_string(to) << std::endl;
+                for (int i = 0; i < row_num; ++i) {
+                    column_vector_datetimev2->insert(value.to_date_int_val());
+                }
+                vectorized::DataTypePtr datetimev2_type(
+                        std::make_shared<vectorized::DataTypeDateTimeV2>());
+                vectorized::ColumnWithTypeAndName test_datetimev2(
+                        column_vector_datetimev2->get_ptr(), datetimev2_type, 
col_name);
+                block.insert(test_datetimev2);
+            }
+            break;
         case TYPE_ARRAY: // array
             type_desc.add_sub_type(TYPE_STRING, true);
             tslot.__set_slotType(type_desc.to_thrift());
@@ -487,6 +512,11 @@ void serialize_and_deserialize_arrow_test() {
                 }
             }
             continue;
+        } else if (std::get<3>(t) == PrimitiveType::TYPE_DATETIMEV2) {
+            // now we only support read doris datetimev2 to arrow
+            block.erase(real_column_name);
+            new_block.erase(real_column_name);
+            continue;
         }
         arrow_column_to_doris_column(array, 0, 
column_with_type_and_name.column,
                                      column_with_type_and_name.type, 
block.rows(), "UTC");
@@ -579,9 +609,9 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) {
                                  column_with_type_and_name.type, block.rows(), 
"UTC");
     std::cout << block.dump_data() << std::endl;
     std::cout << new_block.dump_data() << std::endl;
-    // new block row_index 0, 2 is should be empty
-    EXPECT_EQ(new_block.dump_one_line(0, 1), "{}");
-    EXPECT_EQ(new_block.dump_one_line(2, 1), "{}");
+    // new block row_index 0, 2 which row has key null will be filter
+    EXPECT_EQ(new_block.dump_one_line(0, 1), "{\"doris\":null, \"clever 
amory\":30}");
+    EXPECT_EQ(new_block.dump_one_line(2, 1), "{\"test\":11}");
     EXPECT_EQ(block.dump_data(1, 1), new_block.dump_data(1, 1));
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to