This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b806bce3611 [feat](map) remove duplicated keys in ColumnMap (#54068)
b806bce3611 is described below
commit b806bce361162235a8c603816da7cb6937f26339
Author: Jerry Hu <[email protected]>
AuthorDate: Thu Sep 11 14:00:36 2025 +0800
[feat](map) remove duplicated keys in ColumnMap (#54068)
---
be/src/vec/columns/column_map.cpp | 102 ++++++++-
be/src/vec/columns/column_map.h | 15 ++
be/src/vec/exec/format/orc/vorc_reader.cpp | 6 +-
.../exec/format/parquet/vparquet_column_reader.cpp | 1 +
be/src/vec/exec/jni_connector.cpp | 5 +-
be/src/vec/functions/cast/cast_to_map.h | 6 +-
be/src/vec/functions/function_map.cpp | 12 +-
be/src/vec/sink/vtablet_block_convertor.cpp | 5 +-
be/test/vec/core/column_map_test.cpp | 250 +++++++++++++++++++++
be/test/vec/exec/orc/orc_reader_fill_data_test.cpp | 19 +-
.../doris/nereids/parser/LogicalPlanBuilder.java | 9 +-
.../expression/rules/FoldConstantRuleOnBE.java | 12 +-
.../trees/expressions/functions/agg/MapAgg.java | 4 +-
.../trees/expressions/functions/agg/MapAggV2.java | 4 +-
.../trees/expressions/literal/MapLiteral.java | 69 +++---
.../functions/ComputeSignatureHelperTest.java | 49 ++--
.../one_level_nestedtypes_with_s3data.out | Bin 3604422 -> 3561305
bytes
.../test_nestedtypes_csv_insert_into_with_s3.out | Bin 436103 -> 412376 bytes
.../test_nestedtypes_json_insert_into_with_s3.out | Bin 733470 -> 705865 bytes
.../outfile/csv/test_outfile_csv_complex_type.out | Bin 8605 -> 8561 bytes
.../outfile/csv/test_outfile_csv_map_type.out | Bin 14057 -> 13169 bytes
.../csv/test_outfile_csv_one_nested_type.out | Bin 9348 -> 9244 bytes
.../orc/test_outfile_orc_one_nested_type.out | Bin 9323 -> 9219 bytes
.../parquet/test_outfile_parquet_complex_type.out | Bin 8574 -> 8522 bytes
.../parquet/test_outfile_parquet_map_type.out | Bin 13740 -> 12974 bytes
.../test_outfile_parquet_one_nested_type.out | Bin 9323 -> 9219 bytes
.../export_p0/test_outfile_orc_complex_type.out | Bin 8549 -> 8505 bytes
.../data/export_p0/test_outfile_orc_map_type.out | Bin 14304 -> 13416 bytes
.../orc/test_hive_read_orc_complex_type.out | Bin 20515 -> 20471 bytes
.../paimon/test_paimon_catalog.out | Bin 791619 -> 788923 bytes
.../cast_function/test_cast_map_function.out | Bin 784 -> 745 bytes
.../nereids_function_p0/scalar_function/Map.out | Bin 28228 -> 28192 bytes
.../data/nereids_p0/datatype/test_map.out | Bin 746 -> 1322 bytes
.../cast_function/test_cast_map_function.out | Bin 784 -> 745 bytes
.../string_functions/test_str_to_map.out | Bin 17405 -> 8142 bytes
.../tvf/test_hdfs_parquet_group6.groovy | 1 -
.../suites/nereids_p0/datatype/test_map.groovy | 14 +-
37 files changed, 494 insertions(+), 89 deletions(-)
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index 079201fe0a5..416bd5628dd 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -20,19 +20,21 @@
#include "vec/columns/column_map.h"
-#include <string.h>
-
#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
-#include <limits>
-#include <memory>
+#include <cstddef>
#include <vector>
#include "common/status.h"
#include "pdqsort.h"
#include "runtime/primitive_type.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
#include "vec/common/arena.h"
-#include "vec/common/typeid_cast.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/custom_allocator.h"
+#include "vec/common/hash_table/phmap_fwd_decl.h"
+#include "vec/common/string_ref.h"
#include "vec/common/unaligned.h"
#include "vec/core/sort_block.h"
@@ -505,6 +507,96 @@ MutableColumnPtr ColumnMap::permute(const Permutation&
perm, size_t limit) const
assert_cast<const
ColumnArray&>(*k_arr).get_offsets_ptr());
}
+Status ColumnMap::deduplicate_keys(bool recursive) {
+ const auto inner_rows = keys_column->size();
+ const auto rows = offsets_column->size();
+
+ if (recursive) {
+ auto values_column_ = values_column;
+ if (values_column_->is_nullable()) {
+ values_column_ =
(assert_cast<ColumnNullable&>(*values_column)).get_nested_column_ptr();
+ }
+
+ if (const auto* values_map =
check_and_get_column<ColumnMap>(values_column_.get())) {
+
RETURN_IF_ERROR((const_cast<ColumnMap*>(values_map))->deduplicate_keys(recursive));
+ }
+ }
+
+ DorisVector<StringRef> serialized_keys(inner_rows);
+
+ const size_t max_one_row_byte_size = keys_column->get_max_row_byte_size();
+
+ size_t total_bytes = max_one_row_byte_size * inner_rows;
+ Arena pool;
+
+ if (total_bytes >= config::pre_serialize_keys_limit_bytes) {
+ // reach mem limit, don't serialize in batch
+ const char* begin = nullptr;
+ for (size_t i = 0; i != inner_rows; ++i) {
+ serialized_keys[i] = keys_column->serialize_value_into_arena(i,
pool, begin);
+ }
+ } else {
+ auto* serialized_key_buffer =
reinterpret_cast<uint8_t*>(pool.alloc(total_bytes));
+
+ for (size_t i = 0; i < inner_rows; ++i) {
+ serialized_keys[i].data =
+ reinterpret_cast<char*>(serialized_key_buffer + (i *
max_one_row_byte_size));
+ serialized_keys[i].size = 0;
+ }
+
+ keys_column->serialize_vec(serialized_keys.data(), inner_rows);
+ }
+
+ auto new_offsets = COffsets::create();
+ new_offsets->reserve(rows);
+ auto& new_offsets_data = new_offsets->get_data();
+
+ IColumn::Filter filter(inner_rows, 1);
+ auto& offsets = get_offsets();
+
+ Offset64 offset = 0;
+ bool has_duplicated_key = false;
+
+ for (size_t i = 0; i != rows; ++i) {
+ const auto count = offsets[i] - offsets[i - 1];
+ if (count == 0) {
+ new_offsets_data.push_back(offset);
+ continue;
+ }
+
+ if (count == 1) {
+ filter[offsets[i - 1]] = 1;
+ ++offset;
+ new_offsets_data.push_back(offset);
+ continue;
+ }
+
+ phmap::flat_hash_map<StringRef, size_t> keys_map;
+ keys_map.reserve(count);
+ for (size_t j = offsets[i - 1]; j < offsets[i]; ++j) {
+ const auto& serialized_key = serialized_keys[j];
+ if (keys_map.find(serialized_key) == keys_map.end()) {
+ ++offset;
+ } else {
+ filter[keys_map[serialized_key]] = 0;
+ has_duplicated_key = true;
+ }
+
+ filter[j] = 1;
+ keys_map[serialized_key] = j;
+ }
+ new_offsets_data.push_back(offset);
+ }
+
+ if (has_duplicated_key) {
+ offsets_column = std::move(new_offsets);
+ keys_column->filter(filter);
+ values_column->filter(filter);
+ }
+
+ return Status::OK();
+}
+
void ColumnMap::shrink_padding_chars() {
keys_column->shrink_padding_chars();
values_column->shrink_padding_chars();
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 49f343095f8..1db06d80f30 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -193,6 +193,21 @@ public:
return get_offsets()[i] - get_offsets()[i - 1];
}
+ // Remove duplicate key-value pairs from each internal map in the
ColumnMap.
+ //
+ // For each map stored in the ColumnMap, if multiple entries have the same
key
+ // and identical value, only the **last** such key-value pair is retained;
earlier
+ // duplicates are removed. This ensures that all keys within each map are
unique.
+ //
+ // Note: This function modifies the internal state of the ColumnMap
in-place.
+ // It is intended to be used after data loading or merging steps where
+ // redundant key-value pairs may have been introduced.
+ //
+ // Example:
+ // Input map: {{"a", 1}, {"b", 2}, {"a", 3}, {"c", 4, null: 5, null: 6}}
+ // Result: {{"b", 2}, {"a", 3}, {"c", 3, null: 6}}
+ Status deduplicate_keys(bool recursive = false);
+
ColumnPtr convert_column_if_overflow() override {
keys_column = keys_column->convert_column_if_overflow();
values_column = values_column->convert_column_if_overflow();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index e4a693a6a76..b40896bfb24 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1780,13 +1780,15 @@ Status OrcReader::_fill_doris_data_column(const
std::string& col_name,
ColumnPtr& doris_value_column = doris_map.get_values_ptr();
std::string key_col_name = col_name + ".key";
std::string value_col_name = col_name + ".value";
+
RETURN_IF_ERROR(_orc_column_to_doris_column<false>(
key_col_name, doris_key_column, doris_key_type,
root_node->get_key_node(),
orc_key_type, orc_map->keys.get(), element_size));
- return _orc_column_to_doris_column<false>(
+ RETURN_IF_ERROR(_orc_column_to_doris_column<false>(
value_col_name, doris_value_column, doris_value_type,
root_node->get_value_node(),
- orc_value_type, orc_map->elements.get(), element_size);
+ orc_value_type, orc_map->elements.get(), element_size));
+ return doris_map.deduplicate_keys();
}
case PrimitiveType::TYPE_STRUCT: {
if (orc_column_type->getKind() != orc::TypeKind::STRUCT) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 215f3ec2619..170a29b3e94 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -805,6 +805,7 @@ Status MapColumnReader::read_column_data(
// fill offset and null map
fill_array_offset(_field_schema, map.get_offsets(), null_map_ptr,
_key_reader->get_rep_level(),
_key_reader->get_def_level());
+ RETURN_IF_ERROR(map.deduplicate_keys());
DCHECK_EQ(key_column->size(), map.get_offsets().back());
return Status::OK();
diff --git a/be/src/vec/exec/jni_connector.cpp
b/be/src/vec/exec/jni_connector.cpp
index f6a708b0f96..b222c7368e6 100644
--- a/be/src/vec/exec/jni_connector.cpp
+++ b/be/src/vec/exec/jni_connector.cpp
@@ -453,8 +453,9 @@ Status JniConnector::_fill_map_column(TableMetaAddress&
address, MutableColumnPt
RETURN_IF_ERROR(_fill_column(address, key_column, key_type,
map_offsets[origin_size + num_rows - 1] -
start_offset));
- return _fill_column(address, value_column, value_type,
- map_offsets[origin_size + num_rows - 1] -
start_offset);
+ RETURN_IF_ERROR(_fill_column(address, value_column, value_type,
+ map_offsets[origin_size + num_rows - 1] -
start_offset));
+ return map.deduplicate_keys();
}
Status JniConnector::_fill_struct_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
diff --git a/be/src/vec/functions/cast/cast_to_map.h
b/be/src/vec/functions/cast/cast_to_map.h
index 61697a675e5..b5adb934a5e 100644
--- a/be/src/vec/functions/cast/cast_to_map.h
+++ b/be/src/vec/functions/cast/cast_to_map.h
@@ -75,8 +75,10 @@ WrapperType create_map_wrapper(FunctionContext* context,
const DataTypePtr& from
converted_columns[i] =
block.get_by_position(element_result).column;
}
- block.get_by_position(result).column = ColumnMap::create(
- converted_columns[0], converted_columns[1],
from_col_map->get_offsets_ptr());
+ auto map_column = ColumnMap::create(converted_columns[0],
converted_columns[1],
+ from_col_map->get_offsets_ptr());
+
static_cast<void>(assert_cast<ColumnMap&>(*map_column).deduplicate_keys());
+ block.get_by_position(result).column = std::move(map_column);
return Status::OK();
};
}
diff --git a/be/src/vec/functions/function_map.cpp
b/be/src/vec/functions/function_map.cpp
index a45fcc406b7..d2927497d82 100644
--- a/be/src/vec/functions/function_map.cpp
+++ b/be/src/vec/functions/function_map.cpp
@@ -126,6 +126,8 @@ public:
offset += num_element / 2;
result_col_map_offsets[row] = offset;
}
+
+ RETURN_IF_ERROR(map_column->deduplicate_keys());
block.replace_by_position(result, std::move(result_col));
return Status::OK();
}
@@ -388,9 +390,13 @@ private:
result_col_map_offsets->insert_value(result_col_map_keys_data->size());
}
- return ColumnMap::create(std::move(result_col_map_keys_data),
- std::move(result_col_map_vals_data),
- std::move(result_col_map_offsets));
+ auto map_column =
ColumnMap::create(std::move(result_col_map_keys_data),
+
std::move(result_col_map_vals_data),
+ std::move(result_col_map_offsets));
+
+ // `deduplicate_keys` always return ok
+ static_cast<void>(map_column->deduplicate_keys());
+ return map_column;
}
static std::vector<std::string_view> split_pair_by_delim(const
std::string_view& str,
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp
b/be/src/vec/sink/vtablet_block_convertor.cpp
index bc790419c00..a7f64ce51c8 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -399,7 +399,9 @@ Status OlapTableBlockConvertor::_internal_validate_column(
break;
}
case TYPE_MAP: {
- const auto column_map = assert_cast<const
vectorized::ColumnMap*>(real_column_ptr.get());
+ const auto* column_map = assert_cast<const
vectorized::ColumnMap*>(real_column_ptr.get());
+
RETURN_IF_ERROR((const_cast<ColumnMap*>(column_map))->deduplicate_keys(true));
+
const auto* type_map =
assert_cast<const
vectorized::DataTypeMap*>(remove_nullable(type).get());
auto key_type = type_map->get_key_type();
@@ -411,6 +413,7 @@ Status OlapTableBlockConvertor::_internal_validate_column(
permutation[c] = rows ? (*rows)[r] : r;
}
}
+
fmt::format_to(error_prefix, "MAP type failed: ");
RETURN_IF_ERROR(_validate_column(state, key_type,
column_map->get_keys_ptr(), slot_index,
error_prefix, permutation.size(),
&permutation));
diff --git a/be/test/vec/core/column_map_test.cpp
b/be/test/vec/core/column_map_test.cpp
index 576b991ebe4..37baee03c9e 100644
--- a/be/test/vec/core/column_map_test.cpp
+++ b/be/test/vec/core/column_map_test.cpp
@@ -17,15 +17,21 @@
#include "vec/columns/column_map.h"
+#include <gtest/gtest-death-test.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <cstdint>
#include "gtest/gtest_pred_impl.h"
+#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "vec/columns/column.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/core/field.h"
+#include "vec/data_types/data_type_nullable.h"
namespace doris::vectorized {
TEST(ColumnMapTest2, StringKeyTest) {
@@ -97,6 +103,250 @@ TEST(ColumnMapTest2, StringKeyTest) {
}
};
+TEST(ColumnMapTest2, StringKeyTestDuplicatedKeys) {
+ auto col_map_str = ColumnMap(
+ ColumnNullable::create(ColumnString::create(),
ColumnVector<TYPE_BOOLEAN>::create()),
+ ColumnInt32::create(), ColumnArray::ColumnOffsets::create());
+ Array k1 = {Field::create_field<TYPE_STRING>("a"),
Field::create_field<TYPE_STRING>("b"),
+ Field::create_field<TYPE_STRING>("c"),
Field::create_field<TYPE_STRING>("a"),
+ Field::create_field<TYPE_STRING>("b"),
Field::create_field<TYPE_STRING>("c")};
+ Array v1 = {Field::create_field<TYPE_INT>(1),
Field::create_field<TYPE_INT>(2),
+ Field::create_field<TYPE_INT>(3),
Field::create_field<TYPE_INT>(4),
+ Field::create_field<TYPE_INT>(5),
Field::create_field<TYPE_INT>(6)};
+ {
+ Map map;
+ map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+ map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+ col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+ }
+ {
+ Map map;
+ map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+ map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+ col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+ }
+
+ Array k2 = {Field::create_field<TYPE_STRING>("aa"),
Field::create_field<TYPE_STRING>("bb"),
+ Field::create_field<TYPE_STRING>("cc"),
Field::create_field<TYPE_STRING>("aa"),
+ Field::create_field<TYPE_STRING>("cc")};
+ Array v2 = {Field::create_field<TYPE_INT>(11),
Field::create_field<TYPE_INT>(22),
+ Field::create_field<TYPE_INT>(33),
Field::create_field<TYPE_INT>(111),
+ Field::create_field<TYPE_INT>(333)};
+ {
+ Map map;
+ map.push_back(Field::create_field<TYPE_ARRAY>(k2));
+ map.push_back(Field::create_field<TYPE_ARRAY>(v2));
+ col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+ }
+
+ Array k3 = {Field::create_field<TYPE_STRING>("aaa"),
+ Field::create_field<TYPE_STRING>("bbb"),
+ Field(),
+ Field::create_field<TYPE_STRING>(""),
+ Field::create_field<TYPE_STRING>("ccc"),
+ Field::create_field<TYPE_STRING>("ccc"),
+ Field::create_field<TYPE_STRING>(""),
+ Field()};
+ Array v3 = {Field::create_field<TYPE_INT>(111),
Field::create_field<TYPE_INT>(222),
+ Field::create_field<TYPE_INT>(4321),
Field::create_field<TYPE_INT>(999),
+ Field::create_field<TYPE_INT>(333),
Field::create_field<TYPE_INT>(3333),
+ Field::create_field<TYPE_INT>(9988),
Field::create_field<TYPE_INT>(1234)};
+ {
+ Map map;
+ map.push_back(Field::create_field<TYPE_ARRAY>(k3));
+ map.push_back(Field::create_field<TYPE_ARRAY>(v3));
+ col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+ }
+
+ ASSERT_EQ(col_map_str.size(), 4);
+ auto& keys = col_map_str.get_keys();
+ auto& values = col_map_str.get_values();
+
+ ASSERT_EQ(keys.size(), 25);
+ ASSERT_EQ(keys.size(), values.size());
+
+ auto st = col_map_str.deduplicate_keys();
+ ASSERT_TRUE(st.ok()) << st.to_string();
+
+ ASSERT_EQ(keys.size(), 14);
+ ASSERT_EQ(keys.size(), values.size());
+
+ auto& offsets = col_map_str.get_offsets();
+
+ auto& nullable_keys = assert_cast<ColumnNullable&>(keys);
+ auto& string_keys =
assert_cast<ColumnString&>(nullable_keys.get_nested_column());
+ auto& int_values = assert_cast<ColumnInt32&>(values);
+
+ ASSERT_EQ(offsets.size(), 4);
+ ASSERT_EQ(offsets[0], 3);
+ ASSERT_EQ(offsets[1], 6);
+ ASSERT_EQ(offsets[2], 9);
+ ASSERT_EQ(offsets[3], 14);
+
+ ASSERT_EQ(string_keys.get_element(0), "a");
+ ASSERT_EQ(string_keys.get_element(1), "b");
+ ASSERT_EQ(string_keys.get_element(2), "c");
+
+ ASSERT_EQ(string_keys.get_element(3), "a");
+ ASSERT_EQ(string_keys.get_element(4), "b");
+ ASSERT_EQ(string_keys.get_element(5), "c");
+
+ ASSERT_EQ(string_keys.get_element(6), "bb");
+ ASSERT_EQ(string_keys.get_element(7), "aa");
+ ASSERT_EQ(string_keys.get_element(8), "cc");
+
+ ASSERT_EQ(string_keys.get_element(9), "aaa");
+ ASSERT_EQ(string_keys.get_element(10), "bbb");
+ ASSERT_EQ(string_keys.get_element(11), "ccc");
+ ASSERT_EQ(string_keys.get_element(12), "");
+ ASSERT_TRUE(nullable_keys.is_null_at(13));
+
+ ASSERT_EQ(int_values.get_element(0), 4);
+ ASSERT_EQ(int_values.get_element(1), 5);
+ ASSERT_EQ(int_values.get_element(2), 6);
+
+ ASSERT_EQ(int_values.get_element(3), 4);
+ ASSERT_EQ(int_values.get_element(4), 5);
+ ASSERT_EQ(int_values.get_element(5), 6);
+
+ ASSERT_EQ(int_values.get_element(6), 22);
+ ASSERT_EQ(int_values.get_element(7), 111);
+ ASSERT_EQ(int_values.get_element(8), 333);
+
+ ASSERT_EQ(int_values.get_element(9), 111);
+ ASSERT_EQ(int_values.get_element(10), 222);
+ ASSERT_EQ(int_values.get_element(11), 3333);
+ ASSERT_EQ(int_values.get_element(12), 9988);
+ ASSERT_EQ(int_values.get_element(13), 1234);
+};
+
+TEST(ColumnMapTest2, StringKeyTestDuplicatedKeysNestedMap) {
+ auto col_map_str = ColumnMap(ColumnString::create(),
+ ColumnMap::create(ColumnString::create(),
ColumnInt32::create(),
+
ColumnArray::ColumnOffsets::create()),
+ ColumnArray::ColumnOffsets::create());
+
+ Map inner_map;
+ {
+ Array k1 = {Field::create_field<TYPE_STRING>("a"),
Field::create_field<TYPE_STRING>("b"),
+ Field::create_field<TYPE_STRING>("c"),
Field::create_field<TYPE_STRING>("a"),
+ Field::create_field<TYPE_STRING>("b"),
Field::create_field<TYPE_STRING>("c")};
+ Array v1 = {Field::create_field<TYPE_INT>(1),
Field::create_field<TYPE_INT>(2),
+ Field::create_field<TYPE_INT>(3),
Field::create_field<TYPE_INT>(4),
+ Field::create_field<TYPE_INT>(5),
Field::create_field<TYPE_INT>(6)};
+ inner_map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+ inner_map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+ }
+
+ Map inner_map2;
+ {
+ Array k1 = {Field::create_field<TYPE_STRING>("a"),
Field::create_field<TYPE_STRING>("b"),
+ Field::create_field<TYPE_STRING>("c"),
Field::create_field<TYPE_STRING>("a"),
+ Field::create_field<TYPE_STRING>("b"),
Field::create_field<TYPE_STRING>("c")};
+ Array v1 = {Field::create_field<TYPE_INT>(1),
Field::create_field<TYPE_INT>(2),
+ Field::create_field<TYPE_INT>(3),
Field::create_field<TYPE_INT>(4),
+ Field::create_field<TYPE_INT>(5),
Field::create_field<TYPE_INT>(6)};
+ inner_map2.push_back(Field::create_field<TYPE_ARRAY>(k1));
+ inner_map2.push_back(Field::create_field<TYPE_ARRAY>(v1));
+ }
+
+ Array k1 = {Field::create_field<TYPE_STRING>("a"),
Field::create_field<TYPE_STRING>("a")};
+ Array v1 = {Field::create_field<TYPE_MAP>(inner_map),
+ Field::create_field<TYPE_MAP>(inner_map2)};
+ {
+ Map map;
+ map.push_back(Field::create_field<TYPE_ARRAY>(k1));
+ map.push_back(Field::create_field<TYPE_ARRAY>(v1));
+ col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+ }
+
+ Map inner_map3;
+ {
+ Array k2 = {Field::create_field<TYPE_STRING>("aa"),
Field::create_field<TYPE_STRING>("bb"),
+ Field::create_field<TYPE_STRING>("cc"),
Field::create_field<TYPE_STRING>("aa"),
+ Field::create_field<TYPE_STRING>("cc")};
+ Array v2 = {Field::create_field<TYPE_INT>(11),
Field::create_field<TYPE_INT>(22),
+ Field::create_field<TYPE_INT>(33),
Field::create_field<TYPE_INT>(111),
+ Field::create_field<TYPE_INT>(333)};
+ inner_map3.push_back(Field::create_field<TYPE_ARRAY>(k2));
+ inner_map3.push_back(Field::create_field<TYPE_ARRAY>(v2));
+ }
+
+ Map inner_map4;
+ {
+ Array k2 = {Field::create_field<TYPE_STRING>("aa"),
Field::create_field<TYPE_STRING>("cc"),
+ Field::create_field<TYPE_STRING>("cc")};
+ Array v2 = {Field::create_field<TYPE_INT>(11),
Field::create_field<TYPE_INT>(33),
+ Field::create_field<TYPE_INT>(333)};
+ inner_map4.push_back(Field::create_field<TYPE_ARRAY>(k2));
+ inner_map4.push_back(Field::create_field<TYPE_ARRAY>(v2));
+ }
+
+ Array k2 = {Field::create_field<TYPE_STRING>("aa"),
Field::create_field<TYPE_STRING>("aa")};
+ Array v2 = {Field::create_field<TYPE_MAP>(inner_map3),
+ Field::create_field<TYPE_MAP>(inner_map4)};
+ {
+ Map map;
+ map.push_back(Field::create_field<TYPE_ARRAY>(k2));
+ map.push_back(Field::create_field<TYPE_ARRAY>(v2));
+ col_map_str.insert(Field::create_field<TYPE_MAP>(map));
+ }
+
+ ASSERT_EQ(col_map_str.size(), 2);
+ auto& keys = col_map_str.get_keys();
+ auto& values = col_map_str.get_values();
+
+ ASSERT_EQ(keys.size(), 4);
+ ASSERT_EQ(keys.size(), values.size());
+
+ auto st = col_map_str.deduplicate_keys(true);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+
+ ASSERT_EQ(keys.size(), 2);
+ ASSERT_EQ(keys.size(), values.size());
+
+ auto& offsets = col_map_str.get_offsets();
+ auto& string_keys = assert_cast<ColumnString&>(keys);
+ auto& map_values = assert_cast<ColumnMap&>(values);
+
+ ASSERT_EQ(offsets.size(), 2);
+ ASSERT_EQ(offsets[0], 1);
+ ASSERT_EQ(offsets[1], 2);
+
+ ASSERT_EQ(string_keys.get_element(0), "a");
+ ASSERT_EQ(string_keys.get_element(1), "aa");
+
+ auto map_value1 = get<Array>(map_values[0]);
+ auto map_value2 = get<Array>(map_values[1]);
+
+ ASSERT_EQ(map_value1.size(), 2);
+ ASSERT_EQ(map_value2.size(), 2);
+
+ // keys
+ auto v1_keys = get<Array>(map_value1[0]);
+ ASSERT_EQ(v1_keys.size(), 3);
+ ASSERT_EQ(get<std::string>(v1_keys[0]), "a");
+ ASSERT_EQ(get<std::string>(v1_keys[1]), "b");
+ ASSERT_EQ(get<std::string>(v1_keys[2]), "c");
+
+ auto v2_keys = get<Array>(map_value2[0]);
+ ASSERT_EQ(v2_keys.size(), 2);
+ ASSERT_EQ(get<std::string>(v2_keys[0]), "aa");
+ ASSERT_EQ(get<std::string>(v2_keys[1]), "cc");
+
+ // values
+ auto v1_values = get<Array>(map_value1[1]);
+ ASSERT_EQ(v1_values.size(), 3);
+ ASSERT_EQ(get<int32_t>(v1_values[0]), 4);
+ ASSERT_EQ(get<int32_t>(v1_values[1]), 5);
+ ASSERT_EQ(get<int32_t>(v1_values[2]), 6);
+
+ auto v2_values = get<Array>(map_value2[1]);
+ ASSERT_EQ(v2_values.size(), 2);
+ ASSERT_EQ(get<int32_t>(v2_values[0]), 11);
+ ASSERT_EQ(get<int32_t>(v2_values[1]), 333);
+};
+
TEST(ColumnMapTest2, StringValueTest) {
auto col_map_str64 = ColumnMap(ColumnInt64::create(),
ColumnString64::create(),
ColumnArray::ColumnOffsets::create());
diff --git a/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
b/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
index 2f77cefc889..1de4b2f2d5d 100644
--- a/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
+++ b/be/test/vec/exec/orc/orc_reader_fill_data_test.cpp
@@ -463,11 +463,20 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) {
{doris_column->get_ptr(), doris_struct_type, "cc"}}};
std::cout << block.dump_data() << "\n";
ASSERT_EQ(block.dump_data(),
- "+-------------------+\n|cc(Map(INT,
FLOAT))|\n+-------------------+\n| "
- " {}|\n| {}|\n| {200:6}|\n|
"
- "{300:9}|\n| {400:12, 400:12}|\n| {500:15,
500:15}|\n|{600:18, "
- "600:18,...|\n|{700:21, 700:21,...|\n|{800:24,
800:24,...|\n|{900:27, "
- "900:27,...|\n+-------------------+\n");
+ "+-------------------+\n"
+ "|cc(Map(INT, FLOAT))|\n"
+ "+-------------------+\n"
+ "| {}|\n"
+ "| {}|\n"
+ "| {200:6}|\n"
+ "| {300:9}|\n"
+ "| {400:12}|\n"
+ "| {500:15}|\n"
+ "| {600:18}|\n"
+ "| {700:21}|\n"
+ "| {800:24}|\n"
+ "| {900:27}|\n"
+ "+-------------------+\n");
}
}
} // namespace vectorized
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index abb468a19cc..a80fcf90ab9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -1060,12 +1060,14 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;
import java.util.stream.Collectors;
@@ -3397,7 +3399,12 @@ public class LogicalPlanBuilder extends
DorisParserBaseVisitor<Object> {
values.add(items.get(i));
}
}
- return new MapLiteral(typeCoercionItems(keys),
typeCoercionItems(values));
+ List<Literal> castKeys = typeCoercionItems(keys);
+ List<Literal> castValues = typeCoercionItems(values);
+ Map<Literal, Literal> map = new LinkedHashMap<>();
+ AtomicInteger pos = new AtomicInteger(0);
+ castKeys.forEach(k -> map.put(k,
castValues.get(pos.getAndIncrement())));
+ return new MapLiteral(map);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index 20b57bce6c1..b09bca061cf 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -107,11 +107,13 @@ import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* Constant evaluation of an expression.
@@ -560,10 +562,14 @@ public class FoldConstantRuleOnBE implements
ExpressionPatternRuleFactory {
}
int offsetCount = resultContent.getChildOffsetCount();
if (offsetCount == 1) {
- MapLiteral mapLiteral = new MapLiteral(allKeys, allValues,
mapType);
+ Map<Literal, Literal> map = new LinkedHashMap<>();
+ AtomicInteger pos = new AtomicInteger(0);
+ allKeys.forEach(k -> map.put(k,
allValues.get(pos.getAndIncrement())));
+ MapLiteral mapLiteral = new MapLiteral(map, mapType);
res.add(mapLiteral);
} else {
for (int i = 0; i < offsetCount; ++i) {
+ Map<Literal, Literal> map = new LinkedHashMap<>();
List<Literal> keyLiteral = new ArrayList<>();
List<Literal> valueLiteral = new ArrayList<>();
int startOffset = (int) ((i == 0) ? 0 :
resultContent.getChildOffset(i - 1));
@@ -572,7 +578,9 @@ public class FoldConstantRuleOnBE implements
ExpressionPatternRuleFactory {
keyLiteral.add(allKeys.get(off));
valueLiteral.add(allValues.get(off));
}
- MapLiteral mapLiteral = new MapLiteral(keyLiteral,
valueLiteral, mapType);
+ AtomicInteger pos = new AtomicInteger(0);
+ keyLiteral.forEach(k -> map.put(k,
valueLiteral.get(pos.getAndIncrement())));
+ MapLiteral mapLiteral = new MapLiteral(map, mapType);
res.add(mapLiteral);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
index 7ade67a78f4..8f8652395f7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAgg.java
@@ -29,7 +29,7 @@ import
org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
-import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
/**
@@ -78,6 +78,6 @@ public class MapAgg extends NotNullableAggregateFunction
@Override
public Expression resultForEmptyInput() {
- return new MapLiteral(new ArrayList<>(), new ArrayList<>(),
this.getDataType());
+ return new MapLiteral(new LinkedHashMap<>(), this.getDataType());
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
index 611ba7726b7..652f21f97ba 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MapAggV2.java
@@ -29,7 +29,7 @@ import
org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
-import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
/**
@@ -78,6 +78,6 @@ public class MapAggV2 extends NotNullableAggregateFunction
@Override
public Expression resultForEmptyInput() {
- return new MapLiteral(new ArrayList<>(), new ArrayList<>(),
this.getDataType());
+ return new MapLiteral(new LinkedHashMap<>(), this.getDataType());
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
index d2dc231db2d..d408cf22803 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java
@@ -26,44 +26,41 @@ import org.apache.doris.nereids.types.MapType;
import org.apache.doris.nereids.types.NullType;
import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
import java.util.Objects;
import java.util.stream.Collectors;
/** MapLiteral */
public class MapLiteral extends Literal {
- private final List<Literal> keys;
- private final List<Literal> values;
+ private final Map<Literal, Literal> map;
public MapLiteral() {
super(MapType.SYSTEM_DEFAULT);
- this.keys = ImmutableList.of();
- this.values = ImmutableList.of();
+ this.map = ImmutableMap.of();
}
- public MapLiteral(List<Literal> keys, List<Literal> values) {
- this(keys, values, computeDataType(keys, values));
+ public MapLiteral(Map<Literal, Literal> map) {
+ this(map, computeDataType(map));
}
/**
* create MAP Literal with keys, values and datatype
*/
- public MapLiteral(List<Literal> keys, List<Literal> values, DataType
dataType) {
+ public MapLiteral(Map<Literal, Literal> map, DataType dataType) {
super(dataType);
- this.keys = ImmutableList.copyOf(Objects.requireNonNull(keys, "keys
should not be null"));
- this.values = ImmutableList.copyOf(Objects.requireNonNull(values,
"values should not be null"));
+ this.map = ImmutableMap.copyOf(Objects.requireNonNull(map, "Map should
not be null"));
Preconditions.checkArgument(dataType instanceof MapType,
"dataType should be MapType, but we meet %s", dataType);
- Preconditions.checkArgument(keys.size() == values.size(),
- "key size %s is not equal to value size %s", keys.size(),
values.size());
}
@Override
- public List<List<Literal>> getValue() {
- return ImmutableList.of(keys, values);
+ public Map<Literal, Literal> getValue() {
+ return map;
}
@Override
@@ -73,14 +70,13 @@ public class MapLiteral extends Literal {
} else if (targetType instanceof MapType) {
// we should pass dataType to constructor because arguments maybe
empty
return new MapLiteral(
- keys.stream()
- .map(k -> k.uncheckedCastWithFallback(((MapType)
targetType).getKeyType()))
- .map(Literal.class::cast)
- .collect(ImmutableList.toImmutableList()),
- values.stream()
- .map(v -> v.uncheckedCastWithFallback(((MapType)
targetType).getValueType()))
- .map(Literal.class::cast)
- .collect(ImmutableList.toImmutableList()),
+ map.entrySet().stream()
+ .collect(ImmutableMap.toImmutableMap(
+ entry -> (Literal)
entry.getKey().uncheckedCastWithFallback(((MapType) targetType)
+ .getKeyType()),
+ entry -> (Literal) entry.getValue()
+
.uncheckedCastWithFallback(((MapType) targetType).getValueType())
+ )),
targetType
);
} else {
@@ -90,10 +86,10 @@ public class MapLiteral extends Literal {
@Override
public LiteralExpr toLegacyLiteral() {
- List<LiteralExpr> keyExprs = keys.stream()
+ List<LiteralExpr> keyExprs = map.keySet().stream()
.map(Literal::toLegacyLiteral)
.collect(Collectors.toList());
- List<LiteralExpr> valueExprs = values.stream()
+ List<LiteralExpr> valueExprs = map.values().stream()
.map(Literal::toLegacyLiteral)
.collect(Collectors.toList());
return new
org.apache.doris.analysis.MapLiteral(getDataType().toCatalogDataType(),
keyExprs, valueExprs);
@@ -103,11 +99,8 @@ public class MapLiteral extends Literal {
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("map(");
- if (!keys.isEmpty()) {
- sb.append(keys.get(0).toString()).append(",
").append(values.get(0).toString());
- }
- for (int i = 1; i < keys.size(); i++) {
- sb.append(",
").append(keys.get(i).toString()).append(",").append(values.get(i).toString());
+ for (Entry<Literal, Literal> entry : map.entrySet()) {
+ sb.append(entry.getKey().toString()).append(",
").append(entry.getValue().toString());
}
sb.append(")");
return sb.toString();
@@ -117,11 +110,8 @@ public class MapLiteral extends Literal {
public String computeToSql() {
StringBuilder sb = new StringBuilder();
sb.append("map(");
- if (!keys.isEmpty()) {
- sb.append(keys.get(0).toSql()).append(",
").append(values.get(0).toSql());
- }
- for (int i = 1; i < keys.size(); i++) {
- sb.append(",
").append(keys.get(i).toSql()).append(",").append(values.get(i).toSql());
+ for (Entry<Literal, Literal> entry : map.entrySet()) {
+ sb.append(entry.getKey().toString()).append(",
").append(entry.getValue().toString());
}
sb.append(")");
return sb.toString();
@@ -132,14 +122,13 @@ public class MapLiteral extends Literal {
return visitor.visitMapLiteral(this, context);
}
- private static DataType computeDataType(List<Literal> keys, List<Literal>
values) {
+ private static DataType computeDataType(Map<Literal, Literal> map) {
DataType keyType = NullType.INSTANCE;
DataType valueType = NullType.INSTANCE;
- if (!keys.isEmpty()) {
- keyType = keys.get(0).dataType;
- }
- if (!values.isEmpty()) {
- valueType = values.get(0).dataType;
+ if (!map.isEmpty()) {
+ Map.Entry<Literal, Literal> firstEntry =
map.entrySet().iterator().next();
+ keyType = firstEntry.getKey().dataType;
+ valueType = firstEntry.getValue().dataType;
}
return MapType.of(keyType, valueType);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
index 2dc2447c97d..ce9d83362fa 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ComputeSignatureHelperTest.java
@@ -26,6 +26,7 @@ import
org.apache.doris.nereids.trees.expressions.literal.DateTimeV2Literal;
import org.apache.doris.nereids.trees.expressions.literal.DecimalV3Literal;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.literal.MapLiteral;
import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
import org.apache.doris.nereids.trees.expressions.literal.SmallIntLiteral;
@@ -49,13 +50,16 @@ import org.apache.doris.nereids.types.coercion.AnyDataType;
import org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
import org.apache.doris.nereids.types.coercion.FollowToArgumentType;
+import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.math.BigDecimal;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
public class ComputeSignatureHelperTest {
@@ -137,8 +141,9 @@ public class ComputeSignatureHelperTest {
void testMapImplementAnyDataTypeWithOutIndex() {
FunctionSignature signature =
FunctionSignature.ret(IntegerType.INSTANCE)
.args(MapType.of(AnyDataType.INSTANCE_WITHOUT_INDEX,
AnyDataType.INSTANCE_WITHOUT_INDEX));
- List<Expression> arguments = Lists.newArrayList(new
MapLiteral(Lists.newArrayList(new IntegerLiteral(0)),
- Lists.newArrayList(new BigIntLiteral(0))));
+ Map<Literal, Literal> map = Maps.newLinkedHashMap();
+ map.put(new IntegerLiteral(0), new BigIntLiteral(0));
+ List<Expression> arguments = Lists.newArrayList(new MapLiteral(map));
signature =
ComputeSignatureHelper.implementAnyDataTypeWithOutIndex(signature, arguments);
Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
Assertions.assertTrue(((MapType) signature.getArgType(0)).getKeyType()
instanceof IntegerType);
@@ -202,8 +207,10 @@ public class ComputeSignatureHelperTest {
FunctionSignature signature =
FunctionSignature.ret(IntegerType.INSTANCE)
.args(MapType.of(new AnyDataType(0), new AnyDataType(1)),
new AnyDataType(0), new AnyDataType(1));
+ Map<Literal, Literal> map = Maps.newLinkedHashMap();
+ map.put(new IntegerLiteral(0), new BigIntLiteral(0));
List<Expression> arguments = Lists.newArrayList(
- new MapLiteral(Lists.newArrayList(new IntegerLiteral(0)),
Lists.newArrayList(new BigIntLiteral(0))),
+ new MapLiteral(map),
new BigIntLiteral(0), new IntegerLiteral(0));
signature =
ComputeSignatureHelper.implementAnyDataTypeWithIndex(signature, arguments);
Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
@@ -236,10 +243,11 @@ public class ComputeSignatureHelperTest {
.args(MapType.of(new AnyDataType(0), new AnyDataType(1)),
new AnyDataType(0), new AnyDataType(1),
MapType.of(new FollowToAnyDataType(0), new
FollowToAnyDataType(1)));
+ Map<Literal, Literal> map = Maps.newLinkedHashMap();
+ map.put(new BigIntLiteral(0), new IntegerLiteral(0));
List<Expression> arguments = Lists.newArrayList(
new NullLiteral(), new NullLiteral(), new NullLiteral(),
- new MapLiteral(Lists.newArrayList(new BigIntLiteral(0)),
- Lists.newArrayList(new IntegerLiteral(0))));
+ new MapLiteral(map));
signature =
ComputeSignatureHelper.implementAnyDataTypeWithIndex(signature, arguments);
Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
Assertions.assertTrue(((MapType) signature.getArgType(0)).getKeyType()
instanceof BigIntType);
@@ -275,9 +283,9 @@ public class ComputeSignatureHelperTest {
new AnyDataType(0), new AnyDataType(1),
MapType.of(new FollowToAnyDataType(0), new
FollowToAnyDataType(1)));
List<Expression> arguments = Lists.newArrayList(
- new MapLiteral(Lists.newArrayList(new IntegerLiteral(0)),
Lists.newArrayList(new BigIntLiteral(0))),
+ new MapLiteral(ImmutableMap.of(new IntegerLiteral(0), new
BigIntLiteral(0))),
new BigIntLiteral(0), new IntegerLiteral(0),
- new MapLiteral(Lists.newArrayList(new IntegerLiteral(0)),
Lists.newArrayList(new BigIntLiteral(0))));
+ new MapLiteral(ImmutableMap.of(new IntegerLiteral(0), new
BigIntLiteral(0))));
signature =
ComputeSignatureHelper.implementAnyDataTypeWithIndex(signature, arguments);
Assertions.assertTrue(signature.getArgType(0) instanceof MapType);
Assertions.assertTrue(((MapType) signature.getArgType(0)).getKeyType()
instanceof BigIntType);
@@ -338,8 +346,8 @@ public class ComputeSignatureHelperTest {
MapType.of(DecimalV3Type.WILDCARD,
DecimalV3Type.WILDCARD),
DecimalV3Type.WILDCARD);
List<Expression> arguments = Lists.newArrayList(
- new MapLiteral(Lists.newArrayList(new DecimalV3Literal(new
BigDecimal("1.1234"))),
- Lists.newArrayList(new DecimalV3Literal(new
BigDecimal("12.12345")))),
+ new MapLiteral(ImmutableMap.of(new DecimalV3Literal(new
BigDecimal("1.1234")),
+ new DecimalV3Literal(new BigDecimal("12.12345")))),
new NullLiteral(),
new DecimalV3Literal(new BigDecimal("123.123")));
signature = ComputeSignatureHelper.computePrecision(new
FakeComputeSignature(), signature, arguments);
@@ -392,8 +400,8 @@ public class ComputeSignatureHelperTest {
MapType.of(DateTimeV2Type.SYSTEM_DEFAULT,
DateTimeV2Type.SYSTEM_DEFAULT),
DateTimeV2Type.SYSTEM_DEFAULT);
List<Expression> arguments = Lists.newArrayList(
- new MapLiteral(Lists.newArrayList(new
DateTimeV2Literal("2020-02-02 00:00:00.123")),
- Lists.newArrayList(new DateTimeV2Literal("2020-02-02
00:00:00.12"))),
+ new MapLiteral(ImmutableMap.of(new
DateTimeV2Literal("2020-02-02 00:00:00.123"),
+ new DateTimeV2Literal("2020-02-02 00:00:00.12"))),
new NullLiteral(),
new DateTimeV2Literal("2020-02-02 00:00:00.1234"));
signature = ComputeSignatureHelper.computePrecision(new
FakeComputeSignature(), signature, arguments);
@@ -446,11 +454,11 @@ public class ComputeSignatureHelperTest {
FunctionSignature signature =
FunctionSignature.ret(ArrayType.of(TimeV2Type.INSTANCE)).args(
ArrayType.of(TimeV2Type.INSTANCE),
MapType.of(IntegerType.INSTANCE, TimeV2Type.INSTANCE),
TimeV2Type.INSTANCE);
+ Map<Literal, Literal> map = Maps.newLinkedHashMap();
+ map.put(new IntegerLiteral(1), new TimeV2Literal("12:34:56.1234"));
List<Expression> arguments = Lists.newArrayList(
new ArrayLiteral(Lists.newArrayList(new
TimeV2Literal("12:34:56.12"))),
- new MapLiteral(Lists.newArrayList(new
IntegerLiteral(1)),
- Lists.newArrayList(new
TimeV2Literal("12:34:56.1234"))),
- new TimeV2Literal("12:34:56.123"));
+ new MapLiteral(map), new
TimeV2Literal("12:34:56.123"));
signature = ComputeSignatureHelper.computePrecision(new
FakeComputeSignature(), signature, arguments);
// Check array argument (precision should be 4 from the map value)
@@ -480,16 +488,17 @@ public class ComputeSignatureHelperTest {
DateTimeV2Type.SYSTEM_DEFAULT)),
DateTimeV2Type.SYSTEM_DEFAULT);
+ Map<Literal, Literal> map = Maps.newLinkedHashMap();
+ map.put(new DateTimeV2Literal("2020-02-02 00:00:00.12"),
+ new ArrayLiteral(Lists.newArrayList(new
TimeV2Literal("12:34:56.1"))));
+ Map<Literal, Literal> map2 = Maps.newLinkedHashMap();
+ map2.put(new TimeV2Literal("12:34:56.123"), new
DateTimeV2Literal("2020-02-02 00:00:00"));
// Create complex arguments with different precisions
List<Expression> arguments = Lists.newArrayList(
// Map(DateTimeV2(2) -> Array(TimeV2(1)))
- new MapLiteral(Lists.newArrayList(new
DateTimeV2Literal("2020-02-02 00:00:00.12")),
- Lists.newArrayList(new ArrayLiteral(
- Lists.newArrayList(new
TimeV2Literal("12:34:56.1"))))),
+ new MapLiteral(map),
// Array(Map(TimeV2(3) -> DateTimeV2(0)))
- new ArrayLiteral(Lists.newArrayList(new MapLiteral(
- Lists.newArrayList(new
TimeV2Literal("12:34:56.123")),
- Lists.newArrayList(new
DateTimeV2Literal("2020-02-02 00:00:00"))))),
+ new ArrayLiteral(Lists.newArrayList(new
MapLiteral(map2))),
// DateTimeV2(4)
new DateTimeV2Literal("2020-02-02 00:00:00.1234"));
diff --git
a/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
b/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
index d64339834f7..6160a6b49f4 100644
Binary files
a/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
and
b/regression-test/data/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.out
differ
diff --git
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
index c3aec547150..2fe8294f774 100644
Binary files
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
and
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.out
differ
diff --git
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
index 7d303a86b0a..c79563f7e46 100644
Binary files
a/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
and
b/regression-test/data/datatype_p0/nested_types/query/test_nestedtypes_json_insert_into_with_s3.out
differ
diff --git
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out
index 33bb4b69c85..948051d3b19 100644
Binary files
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out
and
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_complex_type.out
differ
diff --git
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out
index f65ff449d24..28f5e9799b5 100644
Binary files
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out and
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_map_type.out
differ
diff --git
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
index 29738e93716..14734d7e35f 100644
Binary files
a/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
and
b/regression-test/data/export_p0/outfile/csv/test_outfile_csv_one_nested_type.out
differ
diff --git
a/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
b/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
index 830604f4aa2..5d94cbc6fb7 100644
Binary files
a/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
and
b/regression-test/data/export_p0/outfile/orc/test_outfile_orc_one_nested_type.out
differ
diff --git
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
index d4754dc7ead..c8ff8cafdd9 100644
Binary files
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
and
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
differ
diff --git
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
index 7c115ce42b6..86b0395afd0 100644
Binary files
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
and
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out
differ
diff --git
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
index 830604f4aa2..5d94cbc6fb7 100644
Binary files
a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
and
b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_one_nested_type.out
differ
diff --git a/regression-test/data/export_p0/test_outfile_orc_complex_type.out
b/regression-test/data/export_p0/test_outfile_orc_complex_type.out
index e8f4bd06e6c..c7fb4a72e70 100644
Binary files a/regression-test/data/export_p0/test_outfile_orc_complex_type.out
and b/regression-test/data/export_p0/test_outfile_orc_complex_type.out differ
diff --git a/regression-test/data/export_p0/test_outfile_orc_map_type.out
b/regression-test/data/export_p0/test_outfile_orc_map_type.out
index facccb9a1c1..e6fe430dc97 100644
Binary files a/regression-test/data/export_p0/test_outfile_orc_map_type.out and
b/regression-test/data/export_p0/test_outfile_orc_map_type.out differ
diff --git
a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
index f9743a567c9..7c81d0fca23 100644
Binary files
a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
and
b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out
differ
diff --git
a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
index 9dbe2ff79ab..46b9782952a 100644
Binary files
a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out and
b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out differ
diff --git
a/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
b/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
index 17dc71554bd..0533c6cc61c 100644
Binary files
a/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
and
b/regression-test/data/nereids_function_p0/cast_function/test_cast_map_function.out
differ
diff --git a/regression-test/data/nereids_function_p0/scalar_function/Map.out
b/regression-test/data/nereids_function_p0/scalar_function/Map.out
index eee9f8b8eb1..c3eb0f6c071 100644
Binary files a/regression-test/data/nereids_function_p0/scalar_function/Map.out
and b/regression-test/data/nereids_function_p0/scalar_function/Map.out differ
diff --git a/regression-test/data/nereids_p0/datatype/test_map.out
b/regression-test/data/nereids_p0/datatype/test_map.out
index 4ac971fb3a1..727b11353d0 100644
Binary files a/regression-test/data/nereids_p0/datatype/test_map.out and
b/regression-test/data/nereids_p0/datatype/test_map.out differ
diff --git
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
index 17dc71554bd..0533c6cc61c 100644
Binary files
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
and
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_map_function.out
differ
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
index 6dd44129806..996c1ff4570 100644
Binary files
a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
and
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
differ
diff --git
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
index 7c971c3bcbe..0824874239d 100644
---
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
+++
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
@@ -648,7 +648,6 @@
suite("test_hdfs_parquet_group6","external,hive,tvf,external_docker") {
"hadoop.username" = "${hdfsUserName}",
"format" = "parquet") limit 10; """
-
uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_parquet/group6/test_parquet_time_type.parquet"
test {
sql """ select * from HDFS(
diff --git a/regression-test/suites/nereids_p0/datatype/test_map.groovy
b/regression-test/suites/nereids_p0/datatype/test_map.groovy
index f486342c379..5f61729e24c 100644
--- a/regression-test/suites/nereids_p0/datatype/test_map.groovy
+++ b/regression-test/suites/nereids_p0/datatype/test_map.groovy
@@ -35,6 +35,11 @@ suite("test_map") {
sql 'insert into `test_map_table` values (5, 2, {"key2_2": "value2_2",
"key22_2": "value22_2"});'
sql 'insert into `test_map_table` values (6, 3, {"key3": "value3",
"key33": "value33", "key3333": "value333"});'
sql 'insert into `test_map_table` values (7, 4, {"key4": "value4",
"key44": "value44", "key444": "value444", "key4444": "value4444"});'
+ sql 'insert into `test_map_table` values (7, 5, {"key5": "value5",
"key44": "value44", null: "null", "key4": "value444", null: "null2", "key44":
"value4444"});'
+
+ qt_sql """
+ select id, k1, array_sort(map_keys(value)) as v1,
array_sort(map_values(value)) as v2 from test_map_table order by 1, 2;
+ """
sql "DROP TABLE IF EXISTS `test_map_table_right`"
sql """
@@ -52,6 +57,13 @@ suite("test_map") {
sql 'insert into `test_map_table_right` values(6, 3);'
qt_sql """
- select * from test_map_table left join test_map_table_right on
test_map_table.k1 = test_map_table_right.value order by 1,2,4,5;
+ select
+ test_map_table.id id
+ , test_map_table.k1 k1
+ , array_sort(map_keys(test_map_table.value)) as v1
+ , array_sort(map_values(test_map_table.value)) as v2
+ , test_map_table_right.id r_id
+ , test_map_table_right.value r_value
+ from test_map_table left join test_map_table_right on
test_map_table.k1 = test_map_table_right.value order by 1,2,5,6;
"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]