This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b806bce3611 [feat](map) remove duplicated keys in ColumnMap (#54068)
b806bce3611 is described below

commit b806bce361162235a8c603816da7cb6937f26339
Author: Jerry Hu <[email protected]>
AuthorDate: Thu Sep 11 14:00:36 2025 +0800

    [feat](map) remove duplicated keys in ColumnMap (#54068)
---
 be/src/vec/columns/column_map.cpp                  | 102 ++++++++-
 be/src/vec/columns/column_map.h                    |  15 ++
 be/src/vec/exec/format/orc/vorc_reader.cpp         |   6 +-
 .../exec/format/parquet/vparquet_column_reader.cpp |   1 +
 be/src/vec/exec/jni_connector.cpp                  |   5 +-
 be/src/vec/functions/cast/cast_to_map.h            |   6 +-
 be/src/vec/functions/function_map.cpp              |  12 +-
 be/src/vec/sink/vtablet_block_convertor.cpp        |   5 +-
 be/test/vec/core/column_map_test.cpp               | 250 +++++++++++++++++++++
 be/test/vec/exec/orc/orc_reader_fill_data_test.cpp |  19 +-
 .../doris/nereids/parser/LogicalPlanBuilder.java   |   9 +-
 .../expression/rules/FoldConstantRuleOnBE.java     |  12 +-
 .../trees/expressions/functions/agg/MapAgg.java    |   4 +-
 .../trees/expressions/functions/agg/MapAggV2.java  |   4 +-
 .../trees/expressions/literal/MapLiteral.java      |  69 +++---
 .../functions/ComputeSignatureHelperTest.java      |  49 ++--
 .../one_level_nestedtypes_with_s3data.out          | Bin 3604422 -> 3561305 
bytes
 .../test_nestedtypes_csv_insert_into_with_s3.out   | Bin 436103 -> 412376 bytes
 .../test_nestedtypes_json_insert_into_with_s3.out  | Bin 733470 -> 705865 bytes
 .../outfile/csv/test_outfile_csv_complex_type.out  | Bin 8605 -> 8561 bytes
 .../outfile/csv/test_outfile_csv_map_type.out      | Bin 14057 -> 13169 bytes
 .../csv/test_outfile_csv_one_nested_type.out       | Bin 9348 -> 9244 bytes
 .../orc/test_outfile_orc_one_nested_type.out       | Bin 9323 -> 9219 bytes
 .../parquet/test_outfile_parquet_complex_type.out  | Bin 8574 -> 8522 bytes
 .../parquet/test_outfile_parquet_map_type.out      | Bin 13740 -> 12974 bytes
 .../test_outfile_parquet_one_nested_type.out       | Bin 9323 -> 9219 bytes
 .../export_p0/test_outfile_orc_complex_type.out    | Bin 8549 -> 8505 bytes
 .../data/export_p0/test_outfile_orc_map_type.out   | Bin 14304 -> 13416 bytes
 .../orc/test_hive_read_orc_complex_type.out        | Bin 20515 -> 20471 bytes
 .../paimon/test_paimon_catalog.out                 | Bin 791619 -> 788923 bytes
 .../cast_function/test_cast_map_function.out       | Bin 784 -> 745 bytes
 .../nereids_function_p0/scalar_function/Map.out    | Bin 28228 -> 28192 bytes
 .../data/nereids_p0/datatype/test_map.out          | Bin 746 -> 1322 bytes
 .../cast_function/test_cast_map_function.out       | Bin 784 -> 745 bytes
 .../string_functions/test_str_to_map.out           | Bin 17405 -> 8142 bytes
 .../tvf/test_hdfs_parquet_group6.groovy            |   1 -
 .../suites/nereids_p0/datatype/test_map.groovy     |  14 +-
 37 files changed, 494 insertions(+), 89 deletions(-)

diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index 079201fe0a5..416bd5628dd 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -20,19 +20,21 @@
 
 #include "vec/columns/column_map.h"
 
-#include <string.h>
-
 #include <algorithm>
 #include <boost/iterator/iterator_facade.hpp>
-#include <limits>
-#include <memory>
+#include <cstddef>
 #include <vector>
 
 #include "common/status.h"
 #include "pdqsort.h"
 #include "runtime/primitive_type.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
 #include "vec/common/arena.h"
-#include "vec/common/typeid_cast.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/custom_allocator.h"
+#include "vec/common/hash_table/phmap_fwd_decl.h"
+#include "vec/common/string_ref.h"
 #include "vec/common/unaligned.h"
 #include "vec/core/sort_block.h"
 
@@ -505,6 +507,96 @@ MutableColumnPtr ColumnMap::permute(const Permutation& 
perm, size_t limit) const
                              assert_cast<const 
ColumnArray&>(*k_arr).get_offsets_ptr());
 }
 
+Status ColumnMap::deduplicate_keys(bool recursive) {
+    const auto inner_rows = keys_column->size();
+    const auto rows = offsets_column->size();
+
+    if (recursive) {
+        auto values_column_ = values_column;
+        if (values_column_->is_nullable()) {
+            values_column_ = 
(assert_cast<ColumnNullable&>(*values_column)).get_nested_column_ptr();
+        }
+
+        if (const auto* values_map = 
check_and_get_column<ColumnMap>(values_column_.get())) {
+            
RETURN_IF_ERROR((const_cast<ColumnMap*>(values_map))->deduplicate_keys(recursive));
+        }
+    }
+
+    DorisVector<StringRef> serialized_keys(inner_rows);
+
+    const size_t max_one_row_byte_size = keys_column->get_max_row_byte_size();
+
+    size_t total_bytes = max_one_row_byte_size * inner_rows;
+    Arena pool;
+
+    if (total_bytes >= config::pre_serialize_keys_limit_bytes) {
+        // reach mem limit, don't serialize in batch
+        const char* begin = nullptr;
+        for (size_t i = 0; i != inner_rows; ++i) {
+            serialized_keys[i] = keys_column->serialize_value_into_arena(i, 
pool, begin);
+        }
+    } else {
+        auto* serialized_key_buffer = 
reinterpret_cast<uint8_t*>(pool.alloc(total_bytes));
+
+        for (size_t i = 0; i < inner_rows; ++i) {
+            serialized_keys[i].data =
+                    reinterpret_cast<char*>(serialized_key_buffer + (i * 
max_one_row_byte_size));
+            serialized_keys[i].size = 0;
+        }
+
+        keys_column->serialize_vec(serialized_keys.data(), inner_rows);
+    }
+
+    auto new_offsets = COffsets::create();
+    new_offsets->reserve(rows);
+    auto& new_offsets_data = new_offsets->get_data();
+
+    IColumn::Filter filter(inner_rows, 1);
+    auto& offsets = get_offsets();
+
+    Offset64 offset = 0;
+    bool has_duplicated_key = false;
+
+    for (size_t i = 0; i != rows; ++i) {
+        const auto count = offsets[i] - offsets[i - 1];
+        if (count == 0) {
+            new_offsets_data.push_back(offset);
+            continue;
+        }
+
+        if (count == 1) {
+            filter[offsets[i - 1]] = 1;
+            ++offset;
+            new_offsets_data.push_back(offset);
+            continue;
+        }
+
+        phmap::flat_hash_map<StringRef, size_t> keys_map;
+        keys_map.reserve(count);
+        for (size_t j = offsets[i - 1]; j < offsets[i]; ++j) {
+            const auto& serialized_key = serialized_keys[j];
+            if (keys_map.find(serialized_key) == keys_map.end()) {
+                ++offset;
+            } else {
+                filter[keys_map[serialized_key]] = 0;
+                has_duplicated_key = true;
+            }
+
+            filter[j] = 1;
+            keys_map[serialized_key] = j;
+        }
+        new_offsets_data.push_back(offset);
+    }
+
+    if (has_duplicated_key) {
+        offsets_column = std::move(new_offsets);
+        keys_column->filter(filter);
+        values_column->filter(filter);
+    }
+
+    return Status::OK();
+}
+
 void ColumnMap::shrink_padding_chars() {
     keys_column->shrink_padding_chars();
     values_column->shrink_padding_chars();
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 49f343095f8..1db06d80f30 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -193,6 +193,21 @@ public:
         return get_offsets()[i] - get_offsets()[i - 1];
     }
 
+    // Remove duplicate key-value pairs from each internal map in the 
ColumnMap.
+    //
+    // For each map stored in the ColumnMap, if multiple entries have the same 
key
+    // and identical value, only the **last** such key-value pair is retained; 
earlier
+    // duplicates are removed. This ensures that all keys within each map are 
unique.
+    //
+    // Note: This function modifies the internal state of the ColumnMap 
in-place.
+    // It is intended to be used after data loading or merging steps where
+    // redundant key-value pairs may have been introduced.
+    //
+    // Example:
+    //   Input map: {{"a", 1}, {"b", 2}, {"a", 3}, {"c", 4, null: 5, null: 6}}
+    //   Result:    {{"b", 2}, {"a", 3}, {"c", 3, null: 6}}
+    Status deduplicate_keys(bool recursive = false);
+
     ColumnPtr convert_column_if_overflow() override {
         keys_column = keys_column->convert_column_if_overflow();
         values_column = values_column->convert_column_if_overflow();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index e4a693a6a76..b40896bfb24 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1780,13 +1780,15 @@ Status OrcReader::_fill_doris_data_column(const 
std::string& col_name,
         ColumnPtr& doris_value_column = doris_map.get_values_ptr();
         std::string key_col_name = col_name + ".key";
         std::string value_col_name = col_name + ".value";
+
         RETURN_IF_ERROR(_orc_column_to_doris_column<false>(
                 key_col_name, doris_key_column, doris_key_type, 
root_node->get_key_node(),
 
                 orc_key_type, orc_map->keys.get(), element_size));
-        return _orc_column_to_doris_column<false>(
+        RETURN_IF_ERROR(_orc_column_to_doris_column<false>(
                 value_col_name, doris_value_column, doris_value_type, 
root_node->get_value_node(),
-                orc_value_type, orc_map->elements.get(), element_size);
+                orc_value_type, orc_map->elements.get(), element_size));
+        return doris_map.deduplicate_keys();
     }
     case PrimitiveType::TYPE_STRUCT: {
         if (orc_column_type->getKind() != orc::TypeKind::STRUCT) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 215f3ec2619..170a29b3e94 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -805,6 +805,7 @@ Status MapColumnReader::read_column_data(
     // fill offset and null map
     fill_array_offset(_field_schema, map.get_offsets(), null_map_ptr, 
_key_reader->get_rep_level(),
                       _key_reader->get_def_level());
+    RETURN_IF_ERROR(map.deduplicate_keys());
     DCHECK_EQ(key_column->size(), map.get_offsets().back());
 
     return Status::OK();
diff --git a/be/src/vec/exec/jni_connector.cpp 
b/be/src/vec/exec/jni_connector.cpp
index f6a708b0f96..b222c7368e6 100644
--- a/be/src/vec/exec/jni_connector.cpp
+++ b/be/src/vec/exec/jni_connector.cpp
@@ -453,8 +453,9 @@ Status JniConnector::_fill_map_column(TableMetaAddress& 
address, MutableColumnPt
 
     RETURN_IF_ERROR(_fill_column(address, key_column, key_type,
                                  map_offsets[origin_size + num_rows - 1] - 
start_offset));
-    return _fill_column(address, value_column, value_type,
-                        map_offsets[origin_size + num_rows - 1] - 
start_offset);
+    RETURN_IF_ERROR(_fill_column(address, value_column, value_type,
+                                 map_offsets[origin_size + num_rows - 1] - 
start_offset));
+    return map.deduplicate_keys();
 }
 
 Status JniConnector::_fill_struct_column(TableMetaAddress& address, 
MutableColumnPtr& doris_column,
diff --git a/be/src/vec/functions/cast/cast_to_map.h 
b/be/src/vec/functions/cast/cast_to_map.h
index 61697a675e5..b5adb934a5e 100644
--- a/be/src/vec/functions/cast/cast_to_map.h
+++ b/be/src/vec/functions/cast/cast_to_map.h
@@ -75,8 +75,10 @@ WrapperType create_map_wrapper(FunctionContext* context, 
const DataTypePtr& from
             converted_columns[i] = 
block.get_by_position(element_result).column;
         }
 
-        block.get_by_position(result).column = ColumnMap::create(
-                converted_columns[0], converted_columns[1], 
from_col_map->get_offsets_ptr());
+        auto map_column = ColumnMap::create(converted_columns[0], 
converted_columns[1],
+                                            from_col_map->get_offsets_ptr());
+        
static_cast<void>(assert_cast<ColumnMap&>(*map_column).deduplicate_keys());
+        block.get_by_position(result).column = std::move(map_column);
         return Status::OK();
     };
 }
diff --git a/be/src/vec/functions/function_map.cpp 
b/be/src/vec/functions/function_map.cpp
index a45fcc406b7..d2927497d82 100644
--- a/be/src/vec/functions/function_map.cpp
+++ b/be/src/vec/functions/function_map.cpp
@@ -126,6 +126,8 @@ public:
             offset += num_element / 2;
             result_col_map_offsets[row] = offset;
         }
+
+        RETURN_IF_ERROR(map_column->deduplicate_keys());
         block.replace_by_position(result, std::move(result_col));
         return Status::OK();
     }
@@ -388,9 +390,13 @@ private:
             
result_col_map_offsets->insert_value(result_col_map_keys_data->size());
         }
 
-        return ColumnMap::create(std::move(result_col_map_keys_data),
-                                 std::move(result_col_map_vals_data),
-                                 std::move(result_col_map_offsets));
+        auto map_column = 
ColumnMap::create(std::move(result_col_map_keys_data),
+                                            
std::move(result_col_map_vals_data),
+                                            std::move(result_col_map_offsets));
+
+        // `deduplicate_keys` always return ok
+        static_cast<void>(map_column->deduplicate_keys());
+        return map_column;
     }
 
     static std::vector<std::string_view> split_pair_by_delim(const 
std::string_view& str,
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp 
b/be/src/vec/sink/vtablet_block_convertor.cpp
index bc790419c00..a7f64ce51c8 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -399,7 +399,9 @@ Status OlapTableBlockConvertor::_internal_validate_column(
         break;
     }
     case TYPE_MAP: {
-        const auto column_map = assert_cast<const 
vectorized::ColumnMap*>(real_column_ptr.get());
+        const auto* column_map = assert_cast<const 
vectorized::ColumnMap*>(real_column_ptr.get());
+        
RETURN_IF_ERROR((const_cast<ColumnMap*>(column_map))->deduplicate_keys(true));
+
         const auto* type_map =
                 assert_cast<const 
vectorized::DataTypeMap*>(remove_nullable(type).get());
         auto key_type = type_map->get_key_type();
@@ -411,6 +413,7 @@ Status OlapTableBlockConvertor::_internal_validate_column(
                 permutation[c] = rows ? (*rows)[r] : r;
             }
         }
+
         fmt::format_to(error_prefix, "MAP type failed: ");
         RETURN_IF_ERROR(_validate_column(state, key_type, 
column_map->get_keys_ptr(), slot_index,
                                          error_prefix, permutation.size(), 
&permutation));
diff --git a/be/test/vec/core/column_map_test.cpp 
b/be/test/vec/core/column_map_test.cpp
index 576b991ebe4..37baee03c9e 100644
--- a/be/test/vec/core/column_map_test.cpp
+++ b/be/test/vec/core/column_map_test.cpp
@@ -17,15 +17,21 @@
 
 #include "vec/columns/column_map.h"
 
+#include <gtest/gtest-death-test.h>
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <cstdint>
 
 #include "gtest/gtest_pred_impl.h"
+#include "runtime/define_primitive_type.h"
 #include "runtime/primitive_type.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_vector.h"
 #include "vec/core/field.h"
+#include "vec/data_types/data_type_nullable.h"
 
 namespace doris::vectorized {
 TEST(ColumnMapTest2, StringKeyTest) {
@@ -97,6 +103,250 @@ TEST(ColumnMapTest2, StringKeyTest) {
     }
 };
 
+TEST(ColumnMapTest2, StringKeyTestDuplicatedKeys) {
+    auto col_map_str = ColumnMap(
+            ColumnNullable::create(ColumnString::create(), 
ColumnVector<TYPE_BOOLEAN>::create()),
+            ColumnInt32::create(), ColumnArray::ColumnOffsets::create());
+    Array k1 = {Field::create_field<TYPE_STRING>("a"), 
Field::create_field<TYPE_STRING>("b"),
+                Field::create_field<TYPE_STRING>("c"), 
Field::create_field<TYPE_STRING>("a"),
+                Field::create_field<TYPE_STRING>("b"), 
Field::create_field<TYPE_STRING>("c")};
+    Array v1 = {Field::create_field<TYPE_INT>(1), 
Field::create_field<TYPE_INT>(2),
+                Field::create_field<TYPE_INT>(3), 
Field::create_field<TYPE_INT>(4),
+                Field::create_field<TYPE_INT>(5), 
Field::create_field<TYPE_INT>(6)};
+    {
+        Map map;
+        map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+        map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+        col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+    }
+    {
+        Map map;
+        map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+        map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+        col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+    }
+
+    Array k2 = {Field::create_field<TYPE_STRING>("aa"), 
Field::create_field<TYPE_STRING>("bb"),
+                Field::create_field<TYPE_STRING>("cc"), 
Field::create_field<TYPE_STRING>("aa"),
+                Field::create_field<TYPE_STRING>("cc")};
+    Array v2 = {Field::create_field<TYPE_INT>(11), 
Field::create_field<TYPE_INT>(22),
+                Field::create_field<TYPE_INT>(33), 
Field::create_field<TYPE_INT>(111),
+                Field::create_field<TYPE_INT>(333)};
+    {
+        Map map;
+        map.push_back(Field::create_field<TYPE_ARRAY>(k2));
+        map.push_back(Field::create_field<TYPE_ARRAY>(v2));
+        col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+    }
+
+    Array k3 = {Field::create_field<TYPE_STRING>("aaa"),
+                Field::create_field<TYPE_STRING>("bbb"),
+                Field(),
+                Field::create_field<TYPE_STRING>(""),
+                Field::create_field<TYPE_STRING>("ccc"),
+                Field::create_field<TYPE_STRING>("ccc"),
+                Field::create_field<TYPE_STRING>(""),
+                Field()};
+    Array v3 = {Field::create_field<TYPE_INT>(111),  
Field::create_field<TYPE_INT>(222),
+                Field::create_field<TYPE_INT>(4321), 
Field::create_field<TYPE_INT>(999),
+                Field::create_field<TYPE_INT>(333),  
Field::create_field<TYPE_INT>(3333),
+                Field::create_field<TYPE_INT>(9988), 
Field::create_field<TYPE_INT>(1234)};
+    {
+        Map map;
+        map.push_back(Field::create_field<TYPE_ARRAY>(k3));
+        map.push_back(Field::create_field<TYPE_ARRAY>(v3));
+        col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+    }
+
+    ASSERT_EQ(col_map_str.size(), 4);
+    auto& keys = col_map_str.get_keys();
+    auto& values = col_map_str.get_values();
+
+    ASSERT_EQ(keys.size(), 25);
+    ASSERT_EQ(keys.size(), values.size());
+
+    auto st = col_map_str.deduplicate_keys();
+    ASSERT_TRUE(st.ok()) << st.to_string();
+
+    ASSERT_EQ(keys.size(), 14);
+    ASSERT_EQ(keys.size(), values.size());
+
+    auto& offsets = col_map_str.get_offsets();
+
+    auto& nullable_keys = assert_cast<ColumnNullable&>(keys);
+    auto& string_keys = 
assert_cast<ColumnString&>(nullable_keys.get_nested_column());
+    auto& int_values = assert_cast<ColumnInt32&>(values);
+
+    ASSERT_EQ(offsets.size(), 4);
+    ASSERT_EQ(offsets[0], 3);
+    ASSERT_EQ(offsets[1], 6);
+    ASSERT_EQ(offsets[2], 9);
+    ASSERT_EQ(offsets[3], 14);
+
+    ASSERT_EQ(string_keys.get_element(0), "a");
+    ASSERT_EQ(string_keys.get_element(1), "b");
+    ASSERT_EQ(string_keys.get_element(2), "c");
+
+    ASSERT_EQ(string_keys.get_element(3), "a");
+    ASSERT_EQ(string_keys.get_element(4), "b");
+    ASSERT_EQ(string_keys.get_element(5), "c");
+
+    ASSERT_EQ(string_keys.get_element(6), "bb");
+    ASSERT_EQ(string_keys.get_element(7), "aa");
+    ASSERT_EQ(string_keys.get_element(8), "cc");
+
+    ASSERT_EQ(string_keys.get_element(9), "aaa");
+    ASSERT_EQ(string_keys.get_element(10), "bbb");
+    ASSERT_EQ(string_keys.get_element(11), "ccc");
+    ASSERT_EQ(string_keys.get_element(12), "");
+    ASSERT_TRUE(nullable_keys.is_null_at(13));
+
+    ASSERT_EQ(int_values.get_element(0), 4);
+    ASSERT_EQ(int_values.get_element(1), 5);
+    ASSERT_EQ(int_values.get_element(2), 6);
+
+    ASSERT_EQ(int_values.get_element(3), 4);
+    ASSERT_EQ(int_values.get_element(4), 5);
+    ASSERT_EQ(int_values.get_element(5), 6);
+
+    ASSERT_EQ(int_values.get_element(6), 22);
+    ASSERT_EQ(int_values.get_element(7), 111);
+    ASSERT_EQ(int_values.get_element(8), 333);
+
+    ASSERT_EQ(int_values.get_element(9), 111);
+    ASSERT_EQ(int_values.get_element(10), 222);
+    ASSERT_EQ(int_values.get_element(11), 3333);
+    ASSERT_EQ(int_values.get_element(12), 9988);
+    ASSERT_EQ(int_values.get_element(13), 1234);
+};
+
+TEST(ColumnMapTest2, StringKeyTestDuplicatedKeysNestedMap) {
+    auto col_map_str = ColumnMap(ColumnString::create(),
+                                 ColumnMap::create(ColumnString::create(), 
ColumnInt32::create(),
+                                                   
ColumnArray::ColumnOffsets::create()),
+                                 ColumnArray::ColumnOffsets::create());
+
+    Map inner_map;
+    {
+        Array k1 = {Field::create_field<TYPE_STRING>("a"), 
Field::create_field<TYPE_STRING>("b"),
+                    Field::create_field<TYPE_STRING>("c"), 
Field::create_field<TYPE_STRING>("a"),
+                    Field::create_field<TYPE_STRING>("b"), 
Field::create_field<TYPE_STRING>("c")};
+        Array v1 = {Field::create_field<TYPE_INT>(1), 
Field::create_field<TYPE_INT>(2),
+                    Field::create_field<TYPE_INT>(3), 
Field::create_field<TYPE_INT>(4),
+                    Field::create_field<TYPE_INT>(5), 
Field::create_field<TYPE_INT>(6)};
+        inner_map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+        inner_map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+    }
+
+    Map inner_map2;
+    {
+        Array k1 = {Field::create_field<TYPE_STRING>("a"), 
Field::create_field<TYPE_STRING>("b"),
+                    Field::create_field<TYPE_STRING>("c"), 
Field::create_field<TYPE_STRING>("a"),
+                    Field::create_field<TYPE_STRING>("b"), 
Field::create_field<TYPE_STRING>("c")};
+        Array v1 = {Field::create_field<TYPE_INT>(1), 
Field::create_field<TYPE_INT>(2),
+                    Field::create_field<TYPE_INT>(3), 
Field::create_field<TYPE_INT>(4),
+                    Field::create_field<TYPE_INT>(5), 
Field::create_field<TYPE_INT>(6)};
+        inner_map2.push_back(Field::create_field<TYPE_ARRAY>(k1));
+        inner_map2.push_back(Field::create_field<TYPE_ARRAY>(v1));
+    }
+
+    Array k1 = {Field::create_field<TYPE_STRING>("a"), 
Field::create_field<TYPE_STRING>("a")};
+    Array v1 = {Field::create_field<TYPE_MAP>(inner_map),
+                Field::create_field<TYPE_MAP>(inner_map2)};
+    {
+        Map map;
+        map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+        map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+        col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+    }
+
+    Map inner_map3;
+    {
+        Array k2 = {Field::create_field<TYPE_STRING>("aa"), 
Field::create_field<TYPE_STRING>("bb"),
+                    Field::create_field<TYPE_STRING>("cc"), 
Field::create_field<TYPE_STRING>("aa"),
+                    Field::create_field<TYPE_STRING>("cc")};
+        Array v2 = {Field::create_field<TYPE_INT>(11), 
Field::create_field<TYPE_INT>(22),
+                    Field::create_field<TYPE_INT>(33), 
Field::create_field<TYPE_INT>(111),
+                    Field::create_field<TYPE_INT>(333)};
+        inner_map3.push_back(Field::create_field<TYPE_ARRAY>(k2));
+        inner_map3.push_back(Field::create_field<TYPE_ARRAY>(v2));
+    }
+
+    Map inner_map4;
+    {
+        Array k2 = {Field::create_field<TYPE_STRING>("aa"), 
Field::create_field<TYPE_STRING>("cc"),
+                    Field::create_field<TYPE_STRING>("cc")};
+        Array v2 = {Field::create_field<TYPE_INT>(11), 
Field::create_field<TYPE_INT>(33),
+                    Field::create_field<TYPE_INT>(333)};
+        inner_map4.push_back(Field::create_field<TYPE_ARRAY>(k2));
+        inner_map4.push_back(Field::create_field<TYPE_ARRAY>(v2));
+    }
+
+    Array k2 = {Field::create_field<TYPE_STRING>("aa"), 
Field::create_field<TYPE_STRING>("aa")};
+    Array v2 = {Field::create_field<TYPE_MAP>(inner_map3),
+                Field::create_field<TYPE_MAP>(inner_map4)};
+    {
+        Map map;
+        map.push_back(Field::create_field<TYPE_ARRAY>(k2));
+        map.push_back(Field::create_field<TYPE_ARRAY>(v2));
+        col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+    }
+
+    ASSERT_EQ(col_map_str.size(), 2);
+    auto& keys = col_map_str.get_keys();
+    auto& values = col_map_str.get_values();
+
+    ASSERT_EQ(keys.size(), 4);
+    ASSERT_EQ(keys.size(), values.size());
+
+    auto st = col_map_str.deduplicate_keys(true);
+    ASSERT_TRUE(st.ok()) << st.to_string();
+
+    ASSERT_EQ(keys.size(), 2);
+    ASSERT_EQ(keys.size(), values.size());
+
+    auto& offsets = col_map_str.get_offsets();
+    auto& string_keys = assert_cast<ColumnString&>(keys);
+    auto& map_values = assert_cast<ColumnMap&>(values);
+
+    ASSERT_EQ(offsets.size(), 2);
+    ASSERT_EQ(offsets[0], 1);
+    ASSERT_EQ(offsets[1], 2);
+
+    ASSERT_EQ(string_keys.get_element(0), "a");
+    ASSERT_EQ(string_keys.get_element(1), "aa");
+
+    auto map_value1 = get<Array>(map_values[0]);
+    auto map_value2 = get<Array>(map_values[1]);
+
+    ASSERT_EQ(map_value1.size(), 2);
+    ASSERT_EQ(map_value2.size(), 2);
+
+    // keys
+    auto v1_keys = get<Array>(map_value1[0]);
+    ASSERT_EQ(v1_keys.size(), 3);
+    ASSERT_EQ(get<std::string>(v1_keys[0]), "a");
+    ASSERT_EQ(get<std::string>(v1_keys[1]), "b");
+    ASSERT_EQ(get<std::string>(v1_keys[2]), "c");
+
+    auto v2_keys = get<Array>(map_value2[0]);
+    ASSERT_EQ(v2_keys.size(), 2);
+    ASSERT_EQ(get<std::string>(v2_keys[0]), "aa");
+    ASSERT_EQ(get<std::string>(v2_keys[1]), "cc");
+
+    // values
+    auto v1_values = get<Array>(map_value1[1]);
+    ASSERT_EQ(v1_values.size(), 3);
+    ASSERT_EQ(get<int32_t>(v1_values[0]), 4);
+    ASSERT_EQ(get<int32_t>(v1_values[1]), 5);
+    ASSERT_EQ(get<int32_t>(v1_values[2]), 6);
+
+    auto v2_values = get<Array>(map_value2[1]);
+    ASSERT_EQ(v2_values.size(), 2);
+    ASSERT_EQ(get<int32_t>(v2_values[0]), 11);
+    ASSERT_EQ(get<int32_t>(v2_values[1]), 333);
+};
+
 TEST(ColumnMapTest2, StringValueTest) {
     auto col_map_str64 = ColumnMap(ColumnInt64::create(), 
ColumnString64::create(),
                                    ColumnArray::ColumnOffsets::create());
diff --git a/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp 
b/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
index 2f77cefc889..1de4b2f2d5d 100644
--- a/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
+++ b/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
@@ -463,11 +463,20 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) {
                 {doris_column->get_ptr(), doris_struct_type, "cc"}}};
         std::cout << block.dump_data() << "\n";
         ASSERT_EQ(block.dump_data(),
-                  "+-------------------+\n|cc(Map(INT, 
FLOAT))|\n+-------------------+\n|          "
-                  "       {}|\n|                 {}|\n|            {200:6}|\n| 
           "
-                  "{300:9}|\n|   {400:12, 400:12}|\n|   {500:15, 
500:15}|\n|{600:18, "
-                  "600:18,...|\n|{700:21, 700:21,...|\n|{800:24, 
800:24,...|\n|{900:27, "
-                  "900:27,...|\n+-------------------+\n");
+                  "+-------------------+\n"
+                  "|cc(Map(INT, FLOAT))|\n"
+                  "+-------------------+\n"
+                  "|                 {}|\n"
+                  "|                 {}|\n"
+                  "|            {200:6}|\n"
+                  "|            {300:9}|\n"
+                  "|           {400:12}|\n"
+                  "|           {500:15}|\n"
+                  "|           {600:18}|\n"
+                  "|           {700:21}|\n"
+                  "|           {800:24}|\n"
+                  "|           {900:27}|\n"
+                  "+-------------------+\n");
     }
 }
 } // namespace vectorized
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index abb468a19cc..a80fcf90ab9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -1060,12 +1060,14 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
@@ -3397,7 +3399,12 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
                 values.add(items.get(i));
             }
         }
-        return new MapLiteral(typeCoercionItems(keys), 
typeCoercionItems(values));
+        List<Literal> castKeys = typeCoercionItems(keys);
+        List<Literal> castValues = typeCoercionItems(values);
+        Map<Literal, Literal> map = new LinkedHashMap<>();
+        AtomicInteger pos = new AtomicInteger(0);
+        castKeys.forEach(k -> map.put(k, 
castValues.get(pos.getAndIncrement())));
+        return new MapLiteral(map);
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index 20b57bce6c1..b09bca061cf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -107,11 +107,13 @@ import java.time.LocalDateTime;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Constant evaluation of an expression.
@@ -560,10 +562,14 @@ public class FoldConstantRuleOnBE implements 
ExpressionPatternRuleFactory {
             }
             int offsetCount = resultContent.getChildOffsetCount();
             if (offsetCount == 1) {
-                MapLiteral mapLiteral = new MapLiteral(allKeys, allValues, 
mapType);
+                Map<Literal, Literal> map = new LinkedHashMap<>();
+                AtomicInteger pos = new AtomicInteger(0);
+                allKeys.forEach(k -> map.put(k, 
allValues.get(pos.getAndIncrement())));
+                MapLiteral mapLiteral = new MapLiteral(map, mapType);
                 res.add(mapLiteral);
             } else {
                 for (int i = 0; i < offsetCount; ++i) {
+                    Map<Literal, Literal> map = new LinkedHashMap<>();
                     List<Literal> keyLiteral = new ArrayList<>();
                     List<Literal> valueLiteral = new ArrayList<>();
                     int startOffset = (int) ((i == 0) ? 0 : 
resultContent.getChildOffset(i - 1));
@@ -572,7 +578,9 @@ public class FoldConstantRuleOnBE implements 
ExpressionPatternRuleFactory {
                         keyLiteral.add(allKeys.get(off));
                         valueLiteral.add(allValues.get(off));
                     }
-                    MapLiteral mapLiteral = new MapLiteral(keyLiteral, 
valueLiteral, mapType);
+                    AtomicInteger pos = new AtomicInteger(0);
+                    keyLiteral.forEach(k -> map.put(k, 
valueLiteral.get(pos.getAndIncrement())));
+                    MapLiteral mapLiteral = new MapLiteral(map, mapType);
                     res.add(mapLiteral);
                 }
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
index 7ade67a78f4..8f8652395f7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
@@ -29,7 +29,7 @@ import 
org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
-import java.util.ArrayList;
+import java.util.LinkedHashMap;
 import java.util.List;
 
 /**
@@ -78,6 +78,6 @@ public class MapAgg extends NotNullableAggregateFunction
 
     @Override
     public Expression resultForEmptyInput() {
-        return new MapLiteral(new ArrayList<>(), new ArrayList<>(), 
this.getDataType());
+        return new MapLiteral(new LinkedHashMap<>(), this.getDataType());
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
index 611ba7726b7..652f21f97ba 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
@@ -29,7 +29,7 @@ import 
org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
-import java.util.ArrayList;
+import java.util.LinkedHashMap;
 import java.util.List;
 
 /**
@@ -78,6 +78,6 @@ public class MapAggV2 extends NotNullableAggregateFunction
 
     @Override
     public Expression resultForEmptyInput() {
-        return new MapLiteral(new ArrayList<>(), new ArrayList<>(), 
this.getDataType());
+        return new MapLiteral(new LinkedHashMap<>(), this.getDataType());
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
index d2dc231db2d..d408cf22803 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
@@ -26,44 +26,41 @@ import org.apache.doris.nereids.types.MapType;
 import org.apache.doris.nereids.types.NullType;
 
 import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Objects;
 import java.util.stream.Collectors;
 
 /** MapLiteral */
 public class MapLiteral extends Literal {
 
-    private final List<Literal> keys;
-    private final List<Literal> values;
+    private final Map<Literal, Literal> map;
 
     public MapLiteral() {
         super(MapType.SYSTEM_DEFAULT);
-        this.keys = ImmutableList.of();
-        this.values = ImmutableList.of();
+        this.map = ImmutableMap.of();
     }
 
-    public MapLiteral(List<Literal> keys, List<Literal> values) {
-        this(keys, values, computeDataType(keys, values));
+    public MapLiteral(Map<Literal, Literal> map) {
+        this(map, computeDataType(map));
     }
 
     /**
      * create MAP Literal with keys, values and datatype
      */
-    public MapLiteral(List<Literal> keys, List<Literal> values, DataType 
dataType) {
+    public MapLiteral(Map<Literal, Literal> map, DataType dataType) {
         super(dataType);
-        this.keys = ImmutableList.copyOf(Objects.requireNonNull(keys, "keys 
should not be null"));
-        this.values = ImmutableList.copyOf(Objects.requireNonNull(values, 
"values should not be null"));
+        this.map = ImmutableMap.copyOf(Objects.requireNonNull(map, "Map should 
not be null"));
         Preconditions.checkArgument(dataType instanceof MapType,
                 "dataType should be MapType, but we meet %s", dataType);
-        Preconditions.checkArgument(keys.size() == values.size(),
-                "key size %s is not equal to value size %s", keys.size(), 
values.size());
     }
 
     @Override
-    public List<List<Literal>> getValue() {
-        return ImmutableList.of(keys, values);
+    public Map<Literal, Literal> getValue() {
+        return map;
     }
 
     @Override
@@ -73,14 +70,13 @@ public class MapLiteral extends Literal {
         } else if (targetType instanceof MapType) {
             // we should pass dataType to constructor because arguments maybe 
empty
             return new MapLiteral(
-                    keys.stream()
-                            .map(k -> k.uncheckedCastWithFallback(((MapType) 
targetType).getKeyType()))
-                            .map(Literal.class::cast)
-                            .collect(ImmutableList.toImmutableList()),
-                    values.stream()
-                            .map(v -> v.uncheckedCastWithFallback(((MapType) 
targetType).getValueType()))
-                            .map(Literal.class::cast)
-                            .collect(ImmutableList.toImmutableList()),
+                    map.entrySet().stream()
+                            .collect(ImmutableMap.toImmutableMap(
+                                    entry -> (Literal) 
entry.getKey().uncheckedCastWithFallback(((MapType) targetType)
+                                            .getKeyType()),
+                                    entry -> (Literal) entry.getValue()
+                                            
.uncheckedCastWithFallback(((MapType) targetType).getValueType())
+                            )),
                     targetType
             );
         } else {
@@ -90,10 +86,10 @@ public class MapLiteral extends Literal {
 
     @Override
     public LiteralExpr toLegacyLiteral() {
-        List<LiteralExpr> keyExprs = keys.stream()
+        List<LiteralExpr> keyExprs = map.keySet().stream()
                 .map(Literal::toLegacyLiteral)
                 .collect(Collectors.toList());
-        List<LiteralExpr> valueExprs = values.stream()
+        List<LiteralExpr> valueExprs = map.values().stream()
                 .map(Literal::toLegacyLiteral)
                 .collect(Collectors.toList());
         return new 
org.apache.doris.analysis.MapLiteral(getDataType().toCatalogDataType(), 
keyExprs, valueExprs);
@@ -103,11 +99,8 @@ public class MapLiteral extends Literal {
     public String toString() {
         StringBuilder sb = new StringBuilder();
         sb.append("map(");
-        if (!keys.isEmpty()) {
-            sb.append(keys.get(0).toString()).append(", 
").append(values.get(0).toString());
-        }
-        for (int i = 1; i < keys.size(); i++) {
-            sb.append(", 
").append(keys.get(i).toString()).append(",").append(values.get(i).toString());
+        for (Entry<Literal, Literal> entry : map.entrySet()) {
+            sb.append(entry.getKey().toString()).append(", 
").append(entry.getValue().toString());
         }
         sb.append(")");
         return sb.toString();
@@ -117,11 +110,8 @@ public class MapLiteral extends Literal {
     public String computeToSql() {
         StringBuilder sb = new StringBuilder();
         sb.append("map(");
-        if (!keys.isEmpty()) {
-            sb.append(keys.get(0).toSql()).append(", 
").append(values.get(0).toSql());
-        }
-        for (int i = 1; i < keys.size(); i++) {
-            sb.append(", 
").append(keys.get(i).toSql()).append(",").append(values.get(i).toSql());
+        for (Entry<Literal, Literal> entry : map.entrySet()) {
+            sb.append(entry.getKey().toString()).append(", 
").append(entry.getValue().toString());
         }
         sb.append(")");
         return sb.toString();
@@ -132,14 +122,13 @@ public class MapLiteral extends Literal {
         return visitor.visitMapLiteral(this, context);
     }
 
-    private static DataType computeDataType(List<Literal> keys, List<Literal> 
values) {
+    private static DataType computeDataType(Map<Literal, Literal> map) {
         DataType keyType = NullType.INSTANCE;
         DataType valueType = NullType.INSTANCE;
-        if (!keys.isEmpty()) {
-            keyType = keys.get(0).dataType;
-        }
-        if (!values.isEmpty()) {
-            valueType = values.get(0).dataType;
+        if (!map.isEmpty()) {
+            Map.Entry<Literal, Literal> firstEntry = 
map.entrySet().iterator().next();
+            keyType = firstEntry.getKey().dataType;
+            valueType = firstEntry.getValue().dataType;
         }
         return MapType.of(keyType, valueType);
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
index 2dc2447c97d..ce9d83362fa 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
@@ -26,6 +26,7 @@ import 
org.apache.doris.nereids.trees.expressions.literal.DateTimeV2Literal;
 import org.apache.doris.nereids.trees.expressions.literal.DecimalV3Literal;
 import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.literal.MapLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.SmallIntLiteral;
@@ -49,13 +50,16 @@ import org.apache.doris.nereids.types.coercion.AnyDataType;
 import org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
 import org.apache.doris.nereids.types.coercion.FollowToArgumentType;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
 import java.math.BigDecimal;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 
 public class ComputeSignatureHelperTest {
@@ -137,8 +141,9 @@ public class ComputeSignatureHelperTest {
     void testMapImplementAnyDataTypeWithOutIndex() {
         FunctionSignature signature = 
FunctionSignature.ret(IntegerType.INSTANCE)
                 .args(MapType.of(AnyDataType.INSTANCE_WITHOUT_INDEX, 
AnyDataType.INSTANCE_WITHOUT_INDEX));
-        List<Expression> arguments = Lists.newArrayList(new 
MapLiteral(Lists.newArrayList(new IntegerLiteral(0)),
-                Lists.newArrayList(new BigIntLiteral(0))));
+        Map<Literal, Literal> map = Maps.newLinkedHashMap();
+        map.put(new IntegerLiteral(0), new BigIntLiteral(0));
+        List<Expression> arguments = Lists.newArrayList(new MapLiteral(map));
         signature = 
ComputeSignatureHelper.implementAnyDataTypeWithOutIndex(signature, arguments);
         Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
         Assertions.assertTrue(((MapType) signature.getArgType(0)).getKeyType() 
instanceof IntegerType);
@@ -202,8 +207,10 @@ public class ComputeSignatureHelperTest {
         FunctionSignature signature = 
FunctionSignature.ret(IntegerType.INSTANCE)
                 .args(MapType.of(new AnyDataType(0), new AnyDataType(1)),
                         new AnyDataType(0), new AnyDataType(1));
+        Map<Literal, Literal> map = Maps.newLinkedHashMap();
+        map.put(new IntegerLiteral(0), new BigIntLiteral(0));
         List<Expression> arguments = Lists.newArrayList(
-                new MapLiteral(Lists.newArrayList(new IntegerLiteral(0)), 
Lists.newArrayList(new BigIntLiteral(0))),
+                new MapLiteral(map),
                 new BigIntLiteral(0), new IntegerLiteral(0));
         signature = 
ComputeSignatureHelper.implementAnyDataTypeWithIndex(signature, arguments);
         Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
@@ -236,10 +243,11 @@ public class ComputeSignatureHelperTest {
                 .args(MapType.of(new AnyDataType(0), new AnyDataType(1)),
                         new AnyDataType(0), new AnyDataType(1),
                         MapType.of(new FollowToAnyDataType(0), new 
FollowToAnyDataType(1)));
+        Map<Literal, Literal> map = Maps.newLinkedHashMap();
+        map.put(new BigIntLiteral(0), new IntegerLiteral(0));
         List<Expression> arguments = Lists.newArrayList(
                 new NullLiteral(), new NullLiteral(), new NullLiteral(),
-                new MapLiteral(Lists.newArrayList(new BigIntLiteral(0)),
-                        Lists.newArrayList(new IntegerLiteral(0))));
+                new MapLiteral(map));
         signature = 
ComputeSignatureHelper.implementAnyDataTypeWithIndex(signature, arguments);
         Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
         Assertions.assertTrue(((MapType) signature.getArgType(0)).getKeyType() 
instanceof BigIntType);
@@ -275,9 +283,9 @@ public class ComputeSignatureHelperTest {
                         new AnyDataType(0), new AnyDataType(1),
                         MapType.of(new FollowToAnyDataType(0), new 
FollowToAnyDataType(1)));
         List<Expression> arguments = Lists.newArrayList(
-                new MapLiteral(Lists.newArrayList(new IntegerLiteral(0)), 
Lists.newArrayList(new BigIntLiteral(0))),
+                new MapLiteral(ImmutableMap.of(new IntegerLiteral(0), new 
BigIntLiteral(0))),
                 new BigIntLiteral(0), new IntegerLiteral(0),
-                new MapLiteral(Lists.newArrayList(new IntegerLiteral(0)), 
Lists.newArrayList(new BigIntLiteral(0))));
+                new MapLiteral(ImmutableMap.of(new IntegerLiteral(0), new 
BigIntLiteral(0))));
         signature = 
ComputeSignatureHelper.implementAnyDataTypeWithIndex(signature, arguments);
         Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
         Assertions.assertTrue(((MapType) signature.getArgType(0)).getKeyType() 
instanceof BigIntType);
@@ -338,8 +346,8 @@ public class ComputeSignatureHelperTest {
                         MapType.of(DecimalV3Type.WILDCARD, 
DecimalV3Type.WILDCARD),
                         DecimalV3Type.WILDCARD);
         List<Expression> arguments = Lists.newArrayList(
-                new MapLiteral(Lists.newArrayList(new DecimalV3Literal(new 
BigDecimal("1.1234"))),
-                        Lists.newArrayList(new DecimalV3Literal(new 
BigDecimal("12.12345")))),
+                new MapLiteral(ImmutableMap.of(new DecimalV3Literal(new 
BigDecimal("1.1234")),
+                        new DecimalV3Literal(new BigDecimal("12.12345")))),
                 new NullLiteral(),
                 new DecimalV3Literal(new BigDecimal("123.123")));
         signature = ComputeSignatureHelper.computePrecision(new 
FakeComputeSignature(), signature, arguments);
@@ -392,8 +400,8 @@ public class ComputeSignatureHelperTest {
                         MapType.of(DateTimeV2Type.SYSTEM_DEFAULT, 
DateTimeV2Type.SYSTEM_DEFAULT),
                         DateTimeV2Type.SYSTEM_DEFAULT);
         List<Expression> arguments = Lists.newArrayList(
-                new MapLiteral(Lists.newArrayList(new 
DateTimeV2Literal("2020-02-02 00:00:00.123")),
-                        Lists.newArrayList(new DateTimeV2Literal("2020-02-02 
00:00:00.12"))),
+                new MapLiteral(ImmutableMap.of(new 
DateTimeV2Literal("2020-02-02 00:00:00.123"),
+                        new DateTimeV2Literal("2020-02-02 00:00:00.12"))),
                 new NullLiteral(),
                 new DateTimeV2Literal("2020-02-02 00:00:00.1234"));
         signature = ComputeSignatureHelper.computePrecision(new 
FakeComputeSignature(), signature, arguments);
@@ -446,11 +454,11 @@ public class ComputeSignatureHelperTest {
         FunctionSignature signature = 
FunctionSignature.ret(ArrayType.of(TimeV2Type.INSTANCE)).args(
                         ArrayType.of(TimeV2Type.INSTANCE),
                         MapType.of(IntegerType.INSTANCE, TimeV2Type.INSTANCE), 
TimeV2Type.INSTANCE);
+        Map<Literal, Literal> map = Maps.newLinkedHashMap();
+        map.put(new IntegerLiteral(1), new TimeV2Literal("12:34:56.1234"));
         List<Expression> arguments = Lists.newArrayList(
                         new ArrayLiteral(Lists.newArrayList(new 
TimeV2Literal("12:34:56.12"))),
-                        new MapLiteral(Lists.newArrayList(new 
IntegerLiteral(1)),
-                                        Lists.newArrayList(new 
TimeV2Literal("12:34:56.1234"))),
-                        new TimeV2Literal("12:34:56.123"));
+                        new MapLiteral(map), new 
TimeV2Literal("12:34:56.123"));
         signature = ComputeSignatureHelper.computePrecision(new 
FakeComputeSignature(), signature, arguments);
 
         // Check array argument (precision should be 4 from the map value)
@@ -480,16 +488,17 @@ public class ComputeSignatureHelperTest {
                                                         
DateTimeV2Type.SYSTEM_DEFAULT)),
                                         DateTimeV2Type.SYSTEM_DEFAULT);
 
+        Map<Literal, Literal> map = Maps.newLinkedHashMap();
+        map.put(new DateTimeV2Literal("2020-02-02 00:00:00.12"),
+                        new ArrayLiteral(Lists.newArrayList(new 
TimeV2Literal("12:34:56.1"))));
+        Map<Literal, Literal> map2 = Maps.newLinkedHashMap();
+        map2.put(new TimeV2Literal("12:34:56.123"), new 
DateTimeV2Literal("2020-02-02 00:00:00"));
         // Create complex arguments with different precisions
         List<Expression> arguments = Lists.newArrayList(
                         // Map(DateTimeV2(2) -> Array(TimeV2(1)))
-                        new MapLiteral(Lists.newArrayList(new 
DateTimeV2Literal("2020-02-02 00:00:00.12")),
-                                        Lists.newArrayList(new ArrayLiteral(
-                                                        Lists.newArrayList(new 
TimeV2Literal("12:34:56.1"))))),
+                        new MapLiteral(map),
                         // Array(Map(TimeV2(3) -> DateTimeV2(0)))
-                        new ArrayLiteral(Lists.newArrayList(new MapLiteral(
-                                        Lists.newArrayList(new 
TimeV2Literal("12:34:56.123")),
-                                        Lists.newArrayList(new 
DateTimeV2Literal("2020-02-02 00:00:00"))))),
+                        new ArrayLiteral(Lists.newArrayList(new 
MapLiteral(map2))),
                         // DateTimeV2(4)
                         new DateTimeV2Literal("2020-02-02 00:00:00.1234"));
 
diff --git 
a/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
 
b/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
index d64339834f7..6160a6b49f4 100644
Binary files 
a/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
 and 
b/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
 differ
diff --git 
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
 
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
index c3aec547150..2fe8294f774 100644
Binary files 
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
 and 
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
 differ
diff --git 
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
 
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
index 7d303a86b0a..c79563f7e46 100644
Binary files 
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
 and 
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
 differ
diff --git 
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out 
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out
index 33bb4b69c85..948051d3b19 100644
Binary files 
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out 
and 
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out 
differ
diff --git 
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out 
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out
index f65ff449d24..28f5e9799b5 100644
Binary files 
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out and 
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out 
differ
diff --git 
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
 
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
index 29738e93716..14734d7e35f 100644
Binary files 
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
 and 
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
 differ
diff --git 
a/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
 
b/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
index 830604f4aa2..5d94cbc6fb7 100644
Binary files 
a/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
 and 
b/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
 differ
diff --git 
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
 
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
index d4754dc7ead..c8ff8cafdd9 100644
Binary files 
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
 and 
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
 differ
diff --git 
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
 
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
index 7c115ce42b6..86b0395afd0 100644
Binary files 
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
 and 
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
 differ
diff --git 
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
 
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
index 830604f4aa2..5d94cbc6fb7 100644
Binary files 
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
 and 
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
 differ
diff --git a/regression-test/data/export_p0/test_outfile_orc_complex_type.out 
b/regression-test/data/export_p0/test_outfile_orc_complex_type.out
index e8f4bd06e6c..c7fb4a72e70 100644
Binary files a/regression-test/data/export_p0/test_outfile_orc_complex_type.out 
and b/regression-test/data/export_p0/test_outfile_orc_complex_type.out differ
diff --git a/regression-test/data/export_p0/test_outfile_orc_map_type.out 
b/regression-test/data/export_p0/test_outfile_orc_map_type.out
index facccb9a1c1..e6fe430dc97 100644
Binary files a/regression-test/data/export_p0/test_outfile_orc_map_type.out and 
b/regression-test/data/export_p0/test_outfile_orc_map_type.out differ
diff --git 
a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
 
b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
index f9743a567c9..7c81d0fca23 100644
Binary files 
a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
 and 
b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
 differ
diff --git 
a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out 
b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
index 9dbe2ff79ab..46b9782952a 100644
Binary files 
a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out and 
b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out differ
diff --git 
a/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
 
b/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
index 17dc71554bd..0533c6cc61c 100644
Binary files 
a/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
 and 
b/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
 differ
diff --git a/regression-test/data/nereids_function_p0/scalar_function/Map.out 
b/regression-test/data/nereids_function_p0/scalar_function/Map.out
index eee9f8b8eb1..c3eb0f6c071 100644
Binary files a/regression-test/data/nereids_function_p0/scalar_function/Map.out 
and b/regression-test/data/nereids_function_p0/scalar_function/Map.out differ
diff --git a/regression-test/data/nereids_p0/datatype/test_map.out 
b/regression-test/data/nereids_p0/datatype/test_map.out
index 4ac971fb3a1..727b11353d0 100644
Binary files a/regression-test/data/nereids_p0/datatype/test_map.out and 
b/regression-test/data/nereids_p0/datatype/test_map.out differ
diff --git 
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
 
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
index 17dc71554bd..0533c6cc61c 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
 and 
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
 differ
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
index 6dd44129806..996c1ff4570 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
 and 
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
 differ
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
index 7c971c3bcbe..0824874239d 100644
--- 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
@@ -648,7 +648,6 @@ 
suite("test_hdfs_parquet_group6","external,hive,tvf,external_docker") {
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
 
-
             uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group6/test_parquet_time_type.parquet"
             test {
                 sql """ select * from HDFS(
diff --git a/regression-test/suites/nereids_p0/datatype/test_map.groovy 
b/regression-test/suites/nereids_p0/datatype/test_map.groovy
index f486342c379..5f61729e24c 100644
--- a/regression-test/suites/nereids_p0/datatype/test_map.groovy
+++ b/regression-test/suites/nereids_p0/datatype/test_map.groovy
@@ -35,6 +35,11 @@ suite("test_map") {
     sql 'insert into `test_map_table` values (5, 2, {"key2_2": "value2_2", 
"key22_2": "value22_2"});'
     sql 'insert into `test_map_table` values (6, 3, {"key3": "value3", 
"key33": "value33", "key3333": "value333"});'
     sql 'insert into `test_map_table` values (7, 4, {"key4": "value4", 
"key44": "value44", "key444": "value444", "key4444": "value4444"});'
+    sql 'insert into `test_map_table` values (7, 5, {"key5": "value5", 
"key44": "value44", null: "null", "key4": "value444", null: "null2", "key44": 
"value4444"});'
+
+    qt_sql """
+        select id, k1, array_sort(map_keys(value)) as v1, 
array_sort(map_values(value)) as v2 from test_map_table order by 1, 2;
+    """
 
     sql "DROP TABLE IF EXISTS `test_map_table_right`"
     sql """
@@ -52,6 +57,13 @@ suite("test_map") {
     sql 'insert into `test_map_table_right` values(6, 3);'
 
     qt_sql """
-        select * from test_map_table left join test_map_table_right on 
test_map_table.k1 = test_map_table_right.value order by 1,2,4,5;
+        select 
+            test_map_table.id id
+            , test_map_table.k1 k1
+            , array_sort(map_keys(test_map_table.value)) as v1
+            , array_sort(map_values(test_map_table.value)) as v2
+            , test_map_table_right.id r_id
+            , test_map_table_right.value r_value
+        from test_map_table left join test_map_table_right on 
test_map_table.k1 = test_map_table_right.value order by 1,2,5,6;
     """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to