This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 45dfbeb8b3 [feature](agg) add the aggregation function 'array_agg' 
#23474
45dfbeb8b3 is described below

commit 45dfbeb8b313d1a7a290570e31af2ad7b2d2a39d
Author: xy <[email protected]>
AuthorDate: Thu Sep 7 10:21:55 2023 +0800

    [feature](agg) add the aggregation function 'array_agg' #23474
    
    This function requires one arguments just as ARRAY_AGG(col) and col means 
the column whose values you want to aggregate.
    This function Aggregates the values including NULL in a column into an 
array and returns a value of the ARRAY data type.
---
 .../aggregate_function_collect.cpp                 |  58 ++--
 .../aggregate_function_collect.h                   | 317 ++++++++++++++++++++-
 .../sql-functions/aggregate-functions/array-agg.md |  84 ++++++
 .../sql-functions/aggregate-functions/array-agg.md |  84 ++++++
 .../apache/doris/analysis/FunctionCallExpr.java    |   3 +-
 .../apache/doris/catalog/AggregateFunction.java    |   3 +-
 .../java/org/apache/doris/catalog/FunctionSet.java |   5 +
 .../data/query_p0/aggregate/array_agg.out          |  67 +++++
 .../suites/query_p0/aggregate/array_agg.groovy     | 195 +++++++++++++
 9 files changed, 778 insertions(+), 38 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
index d6aab14411..2831f39aa3 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
@@ -27,21 +27,29 @@
 
 namespace doris::vectorized {
 
-template <typename T, typename HasLimit>
+template <typename T, typename HasLimit, typename ShowNull>
 AggregateFunctionPtr do_create_agg_function_collect(bool distinct, const 
DataTypes& argument_types,
                                                     const bool 
result_is_nullable) {
+    if (argument_types[0]->is_nullable()) {
+        if constexpr (ShowNull::value) {
+            return 
creator_without_type::create_ignore_nullable<AggregateFunctionCollect<
+                    AggregateFunctionArrayAggData<T>, std::false_type, 
std::true_type>>(
+                    argument_types, result_is_nullable);
+        }
+    }
+
     if (distinct) {
-        return creator_without_type::create<
-                AggregateFunctionCollect<AggregateFunctionCollectSetData<T, 
HasLimit>, HasLimit>>(
+        return creator_without_type::create<AggregateFunctionCollect<
+                AggregateFunctionCollectSetData<T, HasLimit>, HasLimit, 
std::false_type>>(
                 argument_types, result_is_nullable);
     } else {
-        return creator_without_type::create<
-                AggregateFunctionCollect<AggregateFunctionCollectListData<T, 
HasLimit>, HasLimit>>(
+        return creator_without_type::create<AggregateFunctionCollect<
+                AggregateFunctionCollectListData<T, HasLimit>, HasLimit, 
std::false_type>>(
                 argument_types, result_is_nullable);
     }
 }
 
-template <typename HasLimit>
+template <typename HasLimit, typename ShowNull>
 AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& 
name,
                                                             const DataTypes& 
argument_types,
                                                             const bool 
result_is_nullable) {
@@ -51,25 +59,25 @@ AggregateFunctionPtr 
create_aggregate_function_collect_impl(const std::string& n
     }
 
     WhichDataType which(remove_nullable(argument_types[0]));
-#define DISPATCH(TYPE)                                                         
         \
-    if (which.idx == TypeIndex::TYPE)                                          
         \
-        return do_create_agg_function_collect<TYPE, HasLimit>(distinct, 
argument_types, \
-                                                              
result_is_nullable);
+#define DISPATCH(TYPE)                                                         
                   \
+    if (which.idx == TypeIndex::TYPE)                                          
                   \
+        return do_create_agg_function_collect<TYPE, HasLimit, 
ShowNull>(distinct, argument_types, \
+                                                                        
result_is_nullable);
     FOR_NUMERIC_TYPES(DISPATCH)
     FOR_DECIMAL_TYPES(DISPATCH)
 #undef DISPATCH
     if (which.is_date_or_datetime()) {
-        return do_create_agg_function_collect<Int64, HasLimit>(distinct, 
argument_types,
-                                                               
result_is_nullable);
+        return do_create_agg_function_collect<Int64, HasLimit, 
ShowNull>(distinct, argument_types,
+                                                                         
result_is_nullable);
     } else if (which.is_date_v2()) {
-        return do_create_agg_function_collect<UInt32, HasLimit>(distinct, 
argument_types,
-                                                                
result_is_nullable);
+        return do_create_agg_function_collect<UInt32, HasLimit, 
ShowNull>(distinct, argument_types,
+                                                                          
result_is_nullable);
     } else if (which.is_date_time_v2()) {
-        return do_create_agg_function_collect<UInt64, HasLimit>(distinct, 
argument_types,
-                                                                
result_is_nullable);
+        return do_create_agg_function_collect<UInt64, HasLimit, 
ShowNull>(distinct, argument_types,
+                                                                          
result_is_nullable);
     } else if (which.is_string()) {
-        return do_create_agg_function_collect<StringRef, HasLimit>(distinct, 
argument_types,
-                                                                   
result_is_nullable);
+        return do_create_agg_function_collect<StringRef, HasLimit, ShowNull>(
+                distinct, argument_types, result_is_nullable);
     }
 
     LOG(WARNING) << fmt::format("unsupported input type {} for aggregate 
function {}",
@@ -81,12 +89,17 @@ AggregateFunctionPtr 
create_aggregate_function_collect(const std::string& name,
                                                        const DataTypes& 
argument_types,
                                                        const bool 
result_is_nullable) {
     if (argument_types.size() == 1) {
-        return create_aggregate_function_collect_impl<std::false_type>(name, 
argument_types,
-                                                                       
result_is_nullable);
+        if (name == "array_agg") {
+            return create_aggregate_function_collect_impl<std::false_type, 
std::true_type>(
+                    name, argument_types, result_is_nullable);
+        } else {
+            return create_aggregate_function_collect_impl<std::false_type, 
std::false_type>(
+                    name, argument_types, result_is_nullable);
+        }
     }
     if (argument_types.size() == 2) {
-        return create_aggregate_function_collect_impl<std::true_type>(name, 
argument_types,
-                                                                      
result_is_nullable);
+        return create_aggregate_function_collect_impl<std::true_type, 
std::false_type>(
+                name, argument_types, result_is_nullable);
     }
     LOG(WARNING) << fmt::format("number of parameters for aggregate function 
{}, should be 1 or 2",
                                 name);
@@ -96,6 +109,7 @@ AggregateFunctionPtr create_aggregate_function_collect(const 
std::string& name,
 void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory& 
factory) {
     factory.register_function_both("collect_list", 
create_aggregate_function_collect);
     factory.register_function_both("collect_set", 
create_aggregate_function_collect);
+    factory.register_function_both("array_agg", 
create_aggregate_function_collect);
     factory.register_alias("collect_list", "group_array");
     factory.register_alias("collect_set", "group_uniq_array");
 }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h 
b/be/src/vec/aggregate_functions/aggregate_function_collect.h
index 4e774b887e..78aa01ac68 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h
@@ -288,30 +288,173 @@ struct AggregateFunctionCollectListData<StringRef, 
HasLimit> {
     }
 };
 
-template <typename Data, typename HasLimit>
+template <typename T>
+struct AggregateFunctionArrayAggData {
+    using ElementType = T;
+    using ColVecType = ColumnVectorOrDecimal<ElementType>;
+    MutableColumnPtr column_data;
+    ColVecType* nested_column;
+    NullMap* null_map;
+
+    AggregateFunctionArrayAggData(const DataTypes& argument_types) {
+        if constexpr (IsDecimalNumber<T>) {
+            DataTypePtr column_type = make_nullable(argument_types[0]);
+            column_data = column_type->create_column();
+            null_map = 
&(assert_cast<ColumnNullable&>(*column_data).get_null_map_data());
+            nested_column = assert_cast<ColVecType*>(
+                    
assert_cast<ColumnNullable&>(*column_data).get_nested_column_ptr().get());
+        }
+    }
+
+    AggregateFunctionArrayAggData() {
+        if constexpr (!IsDecimalNumber<T>) {
+            column_data = ColumnNullable::create(ColVecType::create(), 
ColumnUInt8::create());
+            null_map = 
&(assert_cast<ColumnNullable&>(*column_data).get_null_map_data());
+            nested_column = assert_cast<ColVecType*>(
+                    
assert_cast<ColumnNullable&>(*column_data).get_nested_column_ptr().get());
+        }
+    }
+
+    void add(const IColumn& column, size_t row_num) {
+        const auto& col = assert_cast<const ColumnNullable&>(column);
+        const auto& vec = assert_cast<const 
ColVecType&>(col.get_nested_column()).get_data();
+        null_map->push_back(col.get_null_map_data()[row_num]);
+        nested_column->get_data().push_back(vec[row_num]);
+        DCHECK(null_map->size() == nested_column->size());
+    }
+
+    void deserialize_and_merge(const IColumn& column, size_t row_num) {
+        auto& to_arr = assert_cast<const ColumnArray&>(column);
+        auto& to_nested_col = to_arr.get_data();
+        auto col_null = reinterpret_cast<const 
ColumnNullable*>(&to_nested_col);
+        const auto& vec = assert_cast<const 
ColVecType&>(col_null->get_nested_column()).get_data();
+        auto start = to_arr.get_offsets()[row_num - 1];
+        auto end = start + to_arr.get_offsets()[row_num] - 
to_arr.get_offsets()[row_num - 1];
+        for (auto i = start; i < end; ++i) {
+            null_map->push_back(col_null->get_null_map_data()[i]);
+            nested_column->get_data().push_back(vec[i]);
+        }
+    }
+
+    void reset() {
+        null_map->clear();
+        nested_column->clear();
+    }
+
+    void insert_result_into(IColumn& to) const {
+        auto& to_arr = assert_cast<ColumnArray&>(to);
+        auto& to_nested_col = to_arr.get_data();
+        auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
+        auto& vec = 
assert_cast<ColVecType&>(col_null->get_nested_column()).get_data();
+        size_t num_rows = null_map->size();
+        auto& nested_column_data = nested_column->get_data();
+        for (size_t i = 0; i < num_rows; ++i) {
+            col_null->get_null_map_data().push_back((*null_map)[i]);
+            vec.push_back(nested_column_data[i]);
+        }
+        to_arr.get_offsets().push_back(to_nested_col.size());
+    }
+};
+
+template <>
+struct AggregateFunctionArrayAggData<StringRef> {
+    using ElementType = StringRef;
+    using ColVecType = ColumnString;
+    MutableColumnPtr column_data;
+    ColVecType* nested_column;
+    NullMap* null_map;
+
+    AggregateFunctionArrayAggData() {
+        column_data = ColumnNullable::create(ColVecType::create(), 
ColumnUInt8::create());
+        null_map = 
&(assert_cast<ColumnNullable&>(*column_data).get_null_map_data());
+        nested_column = assert_cast<ColVecType*>(
+                
assert_cast<ColumnNullable&>(*column_data).get_nested_column_ptr().get());
+    }
+
+    void add(const IColumn& column, size_t row_num) {
+        const auto& col = assert_cast<const ColumnNullable&>(column);
+        const auto& vec = assert_cast<const 
ColVecType&>(col.get_nested_column());
+        null_map->push_back(col.get_null_map_data()[row_num]);
+        nested_column->insert_from(vec, row_num);
+        DCHECK(null_map->size() == nested_column->size());
+    }
+
+    void deserialize_and_merge(const IColumn& column, size_t row_num) {
+        auto& to_arr = assert_cast<const ColumnArray&>(column);
+        auto& to_nested_col = to_arr.get_data();
+        auto col_null = reinterpret_cast<const 
ColumnNullable*>(&to_nested_col);
+        const auto& vec = assert_cast<const 
ColVecType&>(col_null->get_nested_column());
+        auto start = to_arr.get_offsets()[row_num - 1];
+        auto end = start + to_arr.get_offsets()[row_num] - 
to_arr.get_offsets()[row_num - 1];
+        for (auto i = start; i < end; ++i) {
+            null_map->push_back(col_null->get_null_map_data()[i]);
+            nested_column->insert_from(vec, i);
+        }
+    }
+
+    void reset() {
+        null_map->clear();
+        nested_column->clear();
+    }
+
+    void insert_result_into(IColumn& to) const {
+        auto& to_arr = assert_cast<ColumnArray&>(to);
+        auto& to_nested_col = to_arr.get_data();
+        auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
+        auto& vec = assert_cast<ColVecType&>(col_null->get_nested_column());
+        size_t num_rows = null_map->size();
+        for (size_t i = 0; i < num_rows; ++i) {
+            col_null->get_null_map_data().push_back((*null_map)[i]);
+            vec.insert_from(*nested_column, i);
+        }
+        to_arr.get_offsets().push_back(to_nested_col.size());
+    }
+};
+
+//ShowNull is just used to support array_agg because array_agg needs to 
display NULL
+//todo: Supports order by sorting for array_agg
+template <typename Data, typename HasLimit, typename ShowNull>
 class AggregateFunctionCollect
-        : public IAggregateFunctionDataHelper<Data, 
AggregateFunctionCollect<Data, HasLimit>> {
+        : public IAggregateFunctionDataHelper<Data,
+                                              AggregateFunctionCollect<Data, 
HasLimit, ShowNull>> {
     using GenericType = AggregateFunctionCollectSetData<StringRef, HasLimit>;
 
     static constexpr bool ENABLE_ARENA = std::is_same_v<Data, GenericType>;
 
 public:
+    using BaseHelper = IAggregateFunctionHelper<AggregateFunctionCollect<Data, 
HasLimit, ShowNull>>;
+
     AggregateFunctionCollect(const DataTypes& argument_types,
                              UInt64 max_size_ = 
std::numeric_limits<UInt64>::max())
-            : IAggregateFunctionDataHelper<Data, 
AggregateFunctionCollect<Data, HasLimit>>(
+            : IAggregateFunctionDataHelper<Data,
+                                           AggregateFunctionCollect<Data, 
HasLimit, ShowNull>>(
                       {argument_types}),
               return_type(argument_types[0]) {}
 
     std::string get_name() const override {
-        if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename 
Data::ElementType,
-                                                                      
HasLimit>,
-                                     Data>) {
+        if constexpr (ShowNull::value) {
+            return "array_agg";
+        } else if constexpr (std::is_same_v<AggregateFunctionCollectListData<
+                                                    typename 
Data::ElementType, HasLimit>,
+                                            Data>) {
             return "collect_list";
         } else {
             return "collect_set";
         }
     }
 
+    void create(AggregateDataPtr __restrict place) const override {
+        if constexpr (ShowNull::value) {
+            if constexpr (IsDecimalNumber<typename Data::ElementType>) {
+                new (place) Data(argument_types);
+            } else {
+                new (place) Data();
+            }
+        } else {
+            new (place) Data();
+        }
+    }
+
     DataTypePtr get_return_type() const override {
         return std::make_shared<DataTypeArray>(make_nullable(return_type));
     }
@@ -343,35 +486,181 @@ public:
         auto& rhs_data = this->data(rhs);
         if constexpr (ENABLE_ARENA) {
             data.merge(rhs_data, arena);
-        } else {
+        } else if constexpr (!ShowNull::value) {
             data.merge(rhs_data);
         }
     }
 
     void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& 
buf) const override {
-        this->data(place).write(buf);
+        if constexpr (!ShowNull::value) {
+            this->data(place).write(buf);
+        }
     }
 
     void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
                      Arena*) const override {
-        this->data(place).read(buf);
+        if constexpr (!ShowNull::value) {
+            this->data(place).read(buf);
+        }
     }
 
     void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& 
to) const override {
         auto& to_arr = assert_cast<ColumnArray&>(to);
         auto& to_nested_col = to_arr.get_data();
-        if (to_nested_col.is_nullable()) {
+        if constexpr (ShowNull::value) {
+            DCHECK(to_nested_col.is_nullable());
+            this->data(place).insert_result_into(to);
+        } else {
+            if (to_nested_col.is_nullable()) {
+                auto col_null = 
reinterpret_cast<ColumnNullable*>(&to_nested_col);
+                
this->data(place).insert_result_into(col_null->get_nested_column());
+                
col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(), 
0);
+            } else {
+                this->data(place).insert_result_into(to_nested_col);
+            }
+            to_arr.get_offsets().push_back(to_nested_col.size());
+        }
+    }
+
+    void serialize_without_key_to_column(ConstAggregateDataPtr __restrict 
place,
+                                         IColumn& to) const override {
+        if constexpr (ShowNull::value) {
+            this->data(place).insert_result_into(to);
+        } else {
+            return BaseHelper::serialize_without_key_to_column(place, to);
+        }
+    }
+
+    void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, 
const IColumn& column,
+                                           Arena* arena) const override {
+        if constexpr (ShowNull::value) {
+            const size_t num_rows = column.size();
+            for (size_t i = 0; i != num_rows; ++i) {
+                this->data(place).deserialize_and_merge(column, i);
+            }
+        } else {
+            return BaseHelper::deserialize_and_merge_from_column(place, 
column, arena);
+        }
+    }
+
+    void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t 
offset,
+                                   AggregateDataPtr rhs, const ColumnString* 
column, Arena* arena,
+                                   const size_t num_rows) const override {
+        if constexpr (ShowNull::value) {
+            for (size_t i = 0; i != num_rows; ++i) {
+                this->data(places[i]).deserialize_and_merge(*assert_cast<const 
IColumn*>(column),
+                                                            i);
+            }
+        } else {
+            return BaseHelper::deserialize_and_merge_vec(places, offset, rhs, 
column, arena,
+                                                         num_rows);
+        }
+    }
+
+    void deserialize_from_column(AggregateDataPtr places, const IColumn& 
column, Arena* arena,
+                                 size_t num_rows) const override {
+        if constexpr (ShowNull::value) {
+            for (size_t i = 0; i != num_rows; ++i) {
+                this->data(places).deserialize_and_merge(column, i);
+            }
+        } else {
+            return BaseHelper::deserialize_from_column(places, column, arena, 
num_rows);
+        }
+    }
+
+    void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict 
place,
+                                                 const IColumn& column, size_t 
begin, size_t end,
+                                                 Arena* arena) const override {
+        if constexpr (ShowNull::value) {
+            DCHECK(end <= column.size() && begin <= end) << ", begin:" << 
begin << ", end:" << end
+                                                         << ", column.size():" 
<< column.size();
+            for (size_t i = begin; i <= end; ++i) {
+                this->data(place).deserialize_and_merge(column, i);
+            }
+        } else {
+            return BaseHelper::deserialize_and_merge_from_column_range(place, 
column, begin, end,
+                                                                       arena);
+        }
+    }
+
+    void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, 
size_t offset,
+                                            AggregateDataPtr rhs, const 
ColumnString* column,
+                                            Arena* arena, const size_t 
num_rows) const override {
+        if constexpr (ShowNull::value) {
+            for (size_t i = 0; i != num_rows; ++i) {
+                if (places[i]) {
+                    this->data(places[i]).deserialize_and_merge(
+                            *assert_cast<const IColumn*>(column), i);
+                }
+            }
+        } else {
+            return BaseHelper::deserialize_and_merge_vec_selected(places, 
offset, rhs, column,
+                                                                  arena, 
num_rows);
+        }
+    }
+
+    void serialize_to_column(const std::vector<AggregateDataPtr>& places, 
size_t offset,
+                             MutableColumnPtr& dst, const size_t num_rows) 
const override {
+        if constexpr (ShowNull::value) {
+            for (size_t i = 0; i != num_rows; ++i) {
+                Data& data_ = this->data(places[i] + offset);
+                data_.insert_result_into(*dst);
+            }
+        } else {
+            return BaseHelper::serialize_to_column(places, offset, dst, 
num_rows);
+        }
+    }
+
+    void streaming_agg_serialize_to_column(const IColumn** columns, 
MutableColumnPtr& dst,
+                                           const size_t num_rows, Arena* 
arena) const override {
+        if constexpr (ShowNull::value) {
+            auto& to_arr = assert_cast<ColumnArray&>(*dst);
+            auto& to_nested_col = to_arr.get_data();
+            DCHECK(num_rows == columns[0]->size());
             auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
-            
this->data(place).insert_result_into(col_null->get_nested_column());
-            
col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(), 
0);
+            const auto& col_src = assert_cast<const 
ColumnNullable&>(*(columns[0]));
+
+            for (size_t i = 0; i < num_rows; ++i) {
+                
col_null->get_null_map_data().push_back(col_src.get_null_map_data()[i]);
+                if constexpr (std::is_same_v<StringRef, typename 
Data::ElementType>) {
+                    auto& vec = 
assert_cast<ColumnString&>(col_null->get_nested_column());
+                    const auto& vec_src =
+                            assert_cast<const 
ColumnString&>(col_src.get_nested_column());
+                    vec.insert_from(vec_src, i);
+                } else {
+                    using ColVecType = ColumnVectorOrDecimal<typename 
Data::ElementType>;
+                    auto& vec = 
assert_cast<ColVecType&>(col_null->get_nested_column()).get_data();
+                    auto& vec_src =
+                            assert_cast<const 
ColVecType&>(col_src.get_nested_column()).get_data();
+                    vec.push_back(vec_src[i]);
+                }
+                to_arr.get_offsets().push_back(to_nested_col.size());
+            }
+
         } else {
-            this->data(place).insert_result_into(to_nested_col);
+            return BaseHelper::streaming_agg_serialize_to_column(columns, dst, 
num_rows, arena);
+        }
+    }
+
+    [[nodiscard]] MutableColumnPtr create_serialize_column() const override {
+        if constexpr (ShowNull::value) {
+            return get_return_type()->create_column();
+        } else {
+            return ColumnString::create();
+        }
+    }
+
+    [[nodiscard]] DataTypePtr get_serialized_type() const override {
+        if constexpr (ShowNull::value) {
+            return std::make_shared<DataTypeArray>(make_nullable(return_type));
+        } else {
+            return IAggregateFunction::get_serialized_type();
         }
-        to_arr.get_offsets().push_back(to_nested_col.size());
     }
 
 private:
     DataTypePtr return_type;
+    using IAggregateFunction::argument_types;
 };
 
 } // namespace doris::vectorized
\ No newline at end of file
diff --git 
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md 
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
new file mode 100644
index 0000000000..de61da2862
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
@@ -0,0 +1,84 @@
+---
+{
+    "title": "ARRAY_AGG",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## ARRAY_AGG
+
+### description
+
+#### Syntax
+
+`ARRAY_AGG(col)`
+
+Concatenation of values in a column (including the null value) into an array 
can be used for multiple rows to one row (row to column).
+
+### notice
+
+- The order of the elements in an array is not guaranteed.
+- Returns the array generated by the transformation. The element type in the 
array is the same as the col type.
+
+### example
+
+```sql
+mysql> select * from test_doris_array_agg;
+
++------+------+
+
+| c1   | c2   |
+
++------+------+
+
+|    1 | a    |
+
+|    1 | b    |
+
+|    2 | c    |
+
+|    2 | NULL |
+
+|    3 | NULL |
+
++------+------+
+
+mysql> select c1, array_agg(c2) from test_doris_array_agg group by c1;
+
++------+-----------------+
+
+| c1   | array_agg(`c2`) |
+
++------+-----------------+
+
+|    1 | ["a","b"]       |
+
+|    2 | [NULL,"c"]      |
+
+|    3 | [NULL]          |
+
++------+-----------------+
+```
+
+### keywords
+
+ARRAY_AGG
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
new file mode 100644
index 0000000000..34549ab059
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
@@ -0,0 +1,84 @@
+---
+{
+    "title": "ARRAY_AGG",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## ARRAY_AGG
+
+### description
+
+#### Syntax
+
+`ARRAY_AGG(col)`
+
+将一列中的值(包括空值 null)串联成一个数组,可以用于多行转一行(行转列)。
+
+### notice
+
+- 数组中元素不保证顺序。
+- 返回转换生成的数组。数组中的元素类型与 `col` 类型一致。
+
+### example
+
+```sql
+mysql> select * from test_doris_array_agg;
+
++------+------+
+
+| c1   | c2   |
+
++------+------+
+
+|    1 | a    |
+
+|    1 | b    |
+
+|    2 | c    |
+
+|    2 | NULL |
+
+|    3 | NULL |
+
++------+------+
+
+mysql> select c1, array_agg(c2) from test_doris_array_agg group by c1;
+
++------+-----------------+
+
+| c1   | array_agg(`c2`) |
+
++------+-----------------+
+
+|    1 | ["a","b"]       |
+
+|    2 | [NULL,"c"]      |
+
+|    3 | [NULL]          |
+
++------+-----------------+
+```
+
+### keywords
+
+ARRAY_AGG
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 53e3458c3e..855f2036d8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -1566,7 +1566,8 @@ public class FunctionCallExpr extends Expr {
         }
 
         if (fnName.getFunction().equalsIgnoreCase("collect_list")
-                || fnName.getFunction().equalsIgnoreCase("collect_set")) {
+                || fnName.getFunction().equalsIgnoreCase("collect_set")
+                || fnName.getFunction().equalsIgnoreCase("array_agg")) {
             fn.setReturnType(new ArrayType(getChild(0).type));
         }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
index b646c7ef98..2dceb302b7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
@@ -54,7 +54,8 @@ public class AggregateFunction extends Function {
             FunctionSet.INTERSECT_COUNT, 
FunctionSet.ORTHOGONAL_BITMAP_UNION_COUNT,
             FunctionSet.COUNT, "approx_count_distinct", "ndv", 
FunctionSet.BITMAP_UNION_INT,
             FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", 
FunctionSet.WINDOW_FUNNEL, FunctionSet.RETENTION,
-            FunctionSet.SEQUENCE_MATCH, FunctionSet.SEQUENCE_COUNT, 
FunctionSet.MAP_AGG, FunctionSet.BITMAP_AGG);
+            FunctionSet.SEQUENCE_MATCH, FunctionSet.SEQUENCE_COUNT, 
FunctionSet.MAP_AGG, FunctionSet.BITMAP_AGG,
+            FunctionSet.ARRAY_AGG);
 
     public static ImmutableSet<String> 
ALWAYS_NULLABLE_AGGREGATE_FUNCTION_NAME_SET =
             ImmutableSet.of("stddev_samp", "variance_samp", "var_samp", 
"percentile_approx");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index cde3cecf17..e1c62cc739 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -572,6 +572,8 @@ public class FunctionSet<T> {
 
     public static final String GROUP_ARRAY = "group_array";
 
+    public static final String ARRAY_AGG = "array_agg";
+
     // Populate all the aggregate builtins in the catalog.
     // null symbols indicate the function does not need that step of the 
evaluation.
     // An empty symbol indicates a TODO for the BE to implement the function.
@@ -1402,6 +1404,9 @@ public class FunctionSet<T> {
                     AggregateFunction.createBuiltin(GROUP_ARRAY, 
Lists.newArrayList(t, Type.INT), new ArrayType(t),
                             t, "", "", "", "", "", true, false, true, true));
 
+            addBuiltin(AggregateFunction.createBuiltin(ARRAY_AGG, 
Lists.newArrayList(t), new ArrayType(t), t, "", "", "", "", "",
+                    true, false, true, true));
+
             //first_value/last_value for array
             addBuiltin(AggregateFunction.createAnalyticBuiltin("first_value",
                     Lists.newArrayList(new ArrayType(t)), new ArrayType(t), 
Type.ARRAY,
diff --git a/regression-test/data/query_p0/aggregate/array_agg.out 
b/regression-test/data/query_p0/aggregate/array_agg.out
new file mode 100644
index 0000000000..ee7260205a
--- /dev/null
+++ b/regression-test/data/query_p0/aggregate/array_agg.out
@@ -0,0 +1,67 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql1 --
+["LC", "LB", "alex"]
+["LC", "LB", "LA"]
+["LC", NULL, "LA"]
+["LC", "LB", "LA"]
+[NULL, "LC", "LB", "LA"]
+[NULL, "LC", "LC", "LC", "LC"]
+[NULL, "LC", "LC", "LC", "LC"]
+
+-- !sql2 --
+["alex", NULL, "LC", "LC", "LC", "LC"]
+["LB"]
+["LC"]
+["LA"]
+["LB"]
+["LC"]
+["LA"]
+["LC"]
+["LA"]
+["LB"]
+["LC"]
+["LA"]
+["LB"]
+[NULL, "LC"]
+[NULL, "LC", "LC"]
+[NULL, "LC", "LC"]
+
+-- !sql3 --
+["LC", "LB", "alex", "LC", "LB", "LA", "LC", NULL, "LA", "LC", "LB", "LA", 
NULL, "LC", "LB", "LA", NULL, "LC", "LC", "LC", "LC", NULL, "LC", "LC", "LC", 
"LC"]
+
+-- !sql4 --
+["V1_3", "V1_2", NULL, "V2_3", "V2_2", "V2_1", "V3_3", NULL, "V3_1", "V4_3", 
"V4_2", "V4_1", "V5_3", "V5_3", "V5_2", "V5_1", "V6_3", NULL, "V6_3", NULL, 
"V6_3", "V7_3", NULL, "V7_3", NULL, "V7_3"]
+
+-- !sql5 --
+1      [2, 1, NULL]
+2      [5, 5, 4]
+3      [NULL, 6, 6]
+4      [6, 6, 5]
+5      [NULL, 5, 6]
+6      [NULL, NULL, NULL, NULL, NULL]
+7      [NULL, NULL, NULL, NULL, NULL]
+
+-- !sql6 --
+[""]
+
+-- !sql7 --
+["LC", "LB", "alex"]
+["LC", "LB", "LA"]
+["LC", NULL, "LA"]
+["LC", "LB", "LA"]
+["LC", "LB", "LA"]
+[""]
+[""]
+
+-- !sql8 --
+[NULL]
+
+-- !sql9 --
+1      [1.11, NULL, NULL]
+2      [1.21, NULL, 1.23]
+3      [1.24, 1.23, 1.21]
+4      [1.22, NULL, 1.22]
+5      [NULL, NULL, NULL]
+7      [NULL]
+8      [NULL]
+
diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy 
b/regression-test/suites/query_p0/aggregate/array_agg.groovy
new file mode 100644
index 0000000000..1463eed647
--- /dev/null
+++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("array_agg") {
+    sql "DROP TABLE IF EXISTS `test_array_agg`;"
+    sql "DROP TABLE IF EXISTS `test_array_agg_int`;"
+    sql "DROP TABLE IF EXISTS `test_array_agg_decimal`;"
+    sql """
+       CREATE TABLE `test_array_agg` (
+        `id` int(11) NOT NULL,
+        `label_name` varchar(32) default null,
+        `value_field` string default null,
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """  
+
+    sql """
+    CREATE TABLE `test_array_agg_int` (
+        `id` int(11) NOT NULL,
+        `label_name` varchar(32) default null,
+        `value_field` string default null,
+        `age` int(11) default null
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """  
+
+    sql """
+       CREATE TABLE `test_array_agg_decimal` (
+        `id` int(11) NOT NULL,
+        `label_name` varchar(32) default null,
+        `value_field` string default null,
+        `age` int(11) default null,
+         `o_totalprice` DECIMAL(15, 2) default NULL,
+         `label_name_not_null` varchar(32) not null
+    )ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """  
+
+    sql """
+    insert into `test_array_agg` values
+    (1, "alex",NULL),
+    (1, "LB", "V1_2"),
+    (1, "LC", "V1_3"),
+    (2, "LA", "V2_1"),
+    (2, "LB", "V2_2"),
+    (2, "LC", "V2_3"),
+    (3, "LA", "V3_1"),
+    (3, NULL, NULL),
+    (3, "LC", "V3_3"),
+    (4, "LA", "V4_1"),
+    (4, "LB", "V4_2"),
+    (4, "LC", "V4_3"),
+    (5, "LA", "V5_1"),
+    (5, "LB", "V5_2"),
+    (5, "LC", "V5_3"),
+    (5, NULL, "V5_3"),
+    (6, "LC", "V6_3"),
+    (6, "LC", NULL),
+    (6, "LC", "V6_3"),
+    (6, "LC", NULL),
+    (6, NULL, "V6_3"),
+    (7, "LC", "V7_3"),
+    (7, "LC", NULL),
+    (7, "LC", "V7_3"),
+    (7, "LC", NULL),
+    (7, NULL, "V7_3");
+    """  
+    
+    sql """
+    insert into `test_array_agg_int` values
+    (1, "alex",NULL,NULL),
+    (1, "LB", "V1_2",1),
+    (1, "LC", "V1_3",2),
+    (2, "LA", "V2_1",4),
+    (2, "LB", "V2_2",5),
+    (2, "LC", "V2_3",5),
+    (3, "LA", "V3_1",6),
+    (3, NULL, NULL,6),
+    (3, "LC", "V3_3",NULL),
+    (4, "LA", "V4_1",5),
+    (4, "LB", "V4_2",6),
+    (4, "LC", "V4_3",6),
+    (5, "LA", "V5_1",6),
+    (5, "LB", "V5_2",5),
+    (5, "LC", "V5_3",NULL),
+    (6, "LC", "V6_3",NULL),
+    (6, "LC", NULL,NULL),
+    (6, "LC", "V6_3",NULL),
+    (6, "LC", NULL,NULL),
+    (6, NULL, "V6_3",NULL),
+    (7, "LC", "V7_3",NULL),
+    (7, "LC", NULL,NULL),
+    (7, "LC", "V7_3",NULL),
+    (7, "LC", NULL,NULL),
+    (7, NULL, "V7_3",NULL);
+    """  
+
+    sql """
+    insert into `test_array_agg_decimal` values
+    (1, "alex",NULL,NULL,NULL,"alex"),
+    (1, "LB", "V1_2",1,NULL,"alexxing"),
+    (1, "LC", "V1_3",2,1.11,"alexcoco"),
+    (2, "LA", "V2_1",4,1.23,"alex662"),
+    (2, "LB", "",5,NULL,""),
+    (2, "LC", "",5,1.21,"alexcoco1"),
+    (3, "LA", "V3_1",6,1.21,"alexcoco2"),
+    (3, NULL, NULL,6,1.23,"alexcoco3"),
+    (3, "LC", "V3_3",NULL,1.24,"alexcoco662"),
+    (4, "LA", "",5,1.22,"alexcoco662"),
+    (4, "LB", "V4_2",6,NULL,"alexcoco662"),
+    (4, "LC", "V4_3",6,1.22,"alexcoco662"),
+    (5, "LA", "V5_1",6,NULL,"alexcoco662"),
+    (5, "LB", "V5_2",5,NULL,"alexcoco662"),
+    (5, "LC", "V5_3",NULL,NULL,"alexcoco662"),
+    (7, "", NULL,NULL,NULL,"alexcoco1"),
+    (8, "", NULL,0,NULL,"alexcoco2");
+    """ 
+
+    qt_sql1 """
+    SELECT array_agg(`label_name`) FROM `test_array_agg` GROUP BY `id` order 
by id;
+    """
+    qt_sql2 """
+    SELECT array_agg(label_name) FROM `test_array_agg` GROUP BY value_field 
order by value_field;
+    """
+    qt_sql3 """
+    SELECT array_agg(`label_name`) FROM `test_array_agg`;
+    """
+    qt_sql4 """
+    SELECT array_agg(`value_field`) FROM `test_array_agg`;
+    """
+    qt_sql5 """
+    SELECT id, array_agg(age) FROM test_array_agg_int GROUP BY id order by id;
+    """
+
+    qt_sql6 """
+    select array_agg(label_name) from test_array_agg_decimal where id=7;
+    """
+
+    qt_sql7 """
+    select array_agg(label_name) from test_array_agg_decimal group by id order 
by id;
+    """
+
+    qt_sql8 """
+    select array_agg(age) from test_array_agg_decimal where id=7;
+    """
+
+    qt_sql9 """
+    select id,array_agg(o_totalprice) from test_array_agg_decimal group by id 
order by id;
+    """
+
+    sql "DROP TABLE `test_array_agg`"
+    sql "DROP TABLE `test_array_agg_int`"
+    sql "DROP TABLE `test_array_agg_decimal`"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to