This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 799cb8edfb [fix](func) fix size function for array map (#23966)
799cb8edfb is described below
commit 799cb8edfbf295df1fedd7f7978290ff41661915
Author: amory <[email protected]>
AuthorDate: Wed Sep 6 20:24:38 2023 +0800
[fix](func) fix size function for array map (#23966)
---
.../functions/array/function_array_register.cpp | 2 -
be/src/vec/functions/array/function_array_size.cpp | 30 ------
be/src/vec/functions/array/function_array_size.h | 89 -----------------
be/src/vec/functions/function_map.cpp | 59 ------------
be/src/vec/functions/function_size.cpp | 107 +++++++++++++++++++++
be/src/vec/functions/simple_function_factory.h | 2 +
.../size_funciton/test_size_function.out | 31 ++++++
.../size_funciton/test_size_function.groovy | 56 +++++++++++
8 files changed, 196 insertions(+), 180 deletions(-)
diff --git a/be/src/vec/functions/array/function_array_register.cpp
b/be/src/vec/functions/array/function_array_register.cpp
index c77bc0686d..02ad9cfe05 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_register.cpp
@@ -26,7 +26,6 @@ void register_function_array_shuffle(SimpleFunctionFactory&);
void register_function_array_exists(SimpleFunctionFactory&);
void register_function_array_element(SimpleFunctionFactory&);
void register_function_array_index(SimpleFunctionFactory&);
-void register_function_array_size(SimpleFunctionFactory&);
void register_function_array_aggregation(SimpleFunctionFactory&);
void register_function_array_distinct(SimpleFunctionFactory&);
void register_function_array_remove(SimpleFunctionFactory&);
@@ -60,7 +59,6 @@ void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_exists(factory);
register_function_array_element(factory);
register_function_array_index(factory);
- register_function_array_size(factory);
register_function_array_aggregation(factory);
register_function_array_distinct(factory);
register_function_array_remove(factory);
diff --git a/be/src/vec/functions/array/function_array_size.cpp
b/be/src/vec/functions/array/function_array_size.cpp
deleted file mode 100644
index 3c1fc42025..0000000000
--- a/be/src/vec/functions/array/function_array_size.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "vec/functions/array/function_array_size.h"
-
-#include "vec/functions/simple_function_factory.h"
-
-namespace doris::vectorized {
-
-void register_function_array_size(SimpleFunctionFactory& factory) {
- factory.register_function<FunctionArraySize>();
- factory.register_alias(FunctionArraySize::name, "cardinality");
- factory.register_alias(FunctionArraySize::name, "array_size");
-}
-
-} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_size.h
b/be/src/vec/functions/array/function_array_size.h
deleted file mode 100644
index deb46970b8..0000000000
--- a/be/src/vec/functions/array/function_array_size.h
+++ /dev/null
@@ -1,89 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <glog/logging.h>
-#include <sys/types.h>
-
-#include <algorithm>
-#include <boost/iterator/iterator_facade.hpp>
-#include <memory>
-#include <ostream>
-#include <utility>
-
-#include "common/status.h"
-#include "vec/columns/column.h"
-#include "vec/columns/column_array.h"
-#include "vec/columns/column_vector.h"
-#include "vec/columns/columns_number.h"
-#include "vec/core/block.h"
-#include "vec/core/column_numbers.h"
-#include "vec/core/column_with_type_and_name.h"
-#include "vec/core/types.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_number.h"
-#include "vec/functions/function.h"
-
-namespace doris {
-class FunctionContext;
-} // namespace doris
-
-namespace doris::vectorized {
-
-class FunctionArraySize : public IFunction {
-public:
- static constexpr auto name = "size";
- static FunctionPtr create() { return
std::make_shared<FunctionArraySize>(); }
-
- /// Get function name.
- String get_name() const override { return name; }
-
- bool is_variadic() const override { return false; }
-
- size_t get_number_of_arguments() const override { return 1; }
-
- DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
- DCHECK(is_array(arguments[0]))
- << "first argument for function: " << name << " should be
DataTypeArray";
- return std::make_shared<DataTypeInt64>();
- }
-
- Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
- size_t result, size_t input_rows_count) override {
- auto left_column =
-
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- const auto array_column =
check_and_get_column<ColumnArray>(*left_column);
- if (!array_column) {
- return Status::RuntimeError("unsupported types for function
{}({})", get_name(),
-
block.get_by_position(arguments[0]).type->get_name());
- }
- const auto& offsets = array_column->get_offsets();
-
- auto dst_column = ColumnInt64::create(input_rows_count);
- auto& dst_data = dst_column->get_data();
-
- for (ssize_t i = 0; i < offsets.size(); ++i) {
- dst_data[i] = offsets[i] - offsets[i - 1];
- }
-
- block.replace_by_position(result, std::move(dst_column));
- return Status::OK();
- }
-};
-
-} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_map.cpp
b/be/src/vec/functions/function_map.cpp
index 5206b4f078..a71c19ec45 100644
--- a/be/src/vec/functions/function_map.cpp
+++ b/be/src/vec/functions/function_map.cpp
@@ -146,64 +146,6 @@ public:
}
};
-class FunctionMapSize : public IFunction {
-public:
- static constexpr auto name = "map_size";
- static FunctionPtr create() { return std::make_shared<FunctionMapSize>(); }
-
- /// Get function name.
- String get_name() const override { return name; }
-
- bool is_variadic() const override { return false; }
-
- size_t get_number_of_arguments() const override { return 1; }
-
- DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
- DataTypePtr datatype = arguments[0];
- if (datatype->is_nullable()) {
- datatype = assert_cast<const
DataTypeNullable*>(datatype.get())->get_nested_type();
- }
- DCHECK(is_map(datatype)) << "first argument for function: " << name
- << " should be DataTypeMap";
- return std::make_shared<DataTypeInt64>();
- }
-
- Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
- size_t result, size_t input_rows_count) override {
- const auto& [left_column, left_const] =
- unpack_if_const(block.get_by_position(arguments[0]).column);
- const ColumnMap* map_column = nullptr;
- // const UInt8* map_null_map = nullptr;
- if (left_column->is_nullable()) {
- auto nullable_column = reinterpret_cast<const
ColumnNullable*>(left_column.get());
- map_column =
check_and_get_column<ColumnMap>(nullable_column->get_nested_column());
- // map_null_map =
nullable_column->get_null_map_column().get_data().data();
- } else {
- map_column = check_and_get_column<ColumnMap>(*left_column.get());
- }
- if (!map_column) {
- return Status::RuntimeError("unsupported types for function
{}({})", get_name(),
-
block.get_by_position(arguments[0]).type->get_name());
- }
-
- auto dst_column = ColumnInt64::create(input_rows_count);
- auto& dst_data = dst_column->get_data();
-
- if (left_const) {
- for (size_t i = 0; i < map_column->size(); i++) {
- dst_data[i] = map_column->size_at(0);
- }
- } else {
- for (size_t i = 0; i < map_column->size(); i++) {
- dst_data[i] = map_column->size_at(i);
- }
- }
-
- block.replace_by_position(result, std::move(dst_column));
- return Status::OK();
- }
-};
-
template <bool is_key>
class FunctionMapContains : public IFunction {
public:
@@ -354,7 +296,6 @@ public:
void register_function_map(SimpleFunctionFactory& factory) {
factory.register_function<FunctionMap>();
- factory.register_function<FunctionMapSize>();
factory.register_function<FunctionMapContains<true>>();
factory.register_function<FunctionMapContains<false>>();
factory.register_function<FunctionMapEntries<true>>();
diff --git a/be/src/vec/functions/function_size.cpp
b/be/src/vec/functions/function_size.cpp
new file mode 100644
index 0000000000..988f909443
--- /dev/null
+++ b/be/src/vec/functions/function_size.cpp
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "simple_function_factory.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/array/function_array_utils.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+// size function for size with map and array
+class FunctionSize : public IFunction {
+public:
+ static constexpr auto name = "size";
+ static FunctionPtr create() { return std::make_shared<FunctionSize>(); }
+ String get_name() const override { return name; }
+ bool is_variadic() const override { return true; }
+ size_t get_number_of_arguments() const override { return 0; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ DataTypePtr datatype = arguments[0];
+ if (datatype->is_nullable()) {
+ datatype = assert_cast<const
DataTypeNullable*>(datatype.get())->get_nested_type();
+ }
+ DCHECK(is_map(datatype) || is_array(datatype)) << "first argument for
function: " << name
+ << " should be
DataTypeMap or DataTypeArray";
+ return std::make_shared<DataTypeInt64>();
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ const auto& [left_column, left_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ const auto type = block.get_by_position(arguments[0]).type;
+ const ColumnArray* array_column = nullptr;
+ const ColumnMap* map_column = nullptr;
+ if (is_array(type)) {
+ if (left_column->is_nullable()) {
+ auto nullable_column = reinterpret_cast<const
ColumnNullable*>(left_column.get());
+ array_column =
+
check_and_get_column<ColumnArray>(nullable_column->get_nested_column());
+ } else {
+ array_column =
check_and_get_column<ColumnArray>(*left_column.get());
+ }
+ } else if (is_map(type)) {
+ if (left_column->is_nullable()) {
+ auto nullable_column = reinterpret_cast<const
ColumnNullable*>(left_column.get());
+ map_column =
check_and_get_column<ColumnMap>(nullable_column->get_nested_column());
+ } else {
+ map_column =
check_and_get_column<ColumnMap>(*left_column.get());
+ }
+ }
+
+ auto dst_column = ColumnInt64::create(input_rows_count);
+ auto& dst_data = dst_column->get_data();
+
+ if (left_const && map_column) {
+ for (size_t i = 0; i < map_column->size(); i++) {
+ dst_data[i] = map_column->size_at(0);
+ }
+ } else if (left_const && array_column) {
+ for (size_t i = 0; i < array_column->size(); i++) {
+ dst_data[i] = array_column->size_at(0);
+ }
+ } else if (map_column) {
+ for (size_t i = 0; i < map_column->size(); i++) {
+ dst_data[i] = map_column->size_at(i);
+ }
+ } else if (array_column) {
+ for (size_t i = 0; i < array_column->size(); i++) {
+ dst_data[i] = array_column->size_at(i);
+ }
+ } else {
+ return Status::RuntimeError("unsupported types for function
{}({})", get_name(),
+
block.get_by_position(arguments[0]).type->get_name());
+ }
+
+ block.replace_by_position(result, std::move(dst_column));
+ return Status::OK();
+ }
+};
+
+void register_function_size(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionSize>();
+ factory.register_alias(FunctionSize::name, "map_size");
+ factory.register_alias(FunctionSize::name, "cardinality");
+ factory.register_alias(FunctionSize::name, "array_size");
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index b681d61db4..04ecabd142 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -32,6 +32,7 @@ namespace doris::vectorized {
class SimpleFunctionFactory;
+void register_function_size(SimpleFunctionFactory& factory);
void register_function_comparison(SimpleFunctionFactory& factory);
void register_function_comparison_eq_for_null(SimpleFunctionFactory& factory);
void register_function_hll_cardinality(SimpleFunctionFactory& factory);
@@ -206,6 +207,7 @@ public:
static std::once_flag oc;
static SimpleFunctionFactory instance;
std::call_once(oc, []() {
+ register_function_size(instance);
register_function_bitmap(instance);
register_function_quantile_state(instance);
register_function_bitmap_variadic(instance);
diff --git
a/regression-test/data/query_p0/sql_functions/size_funciton/test_size_function.out
b/regression-test/data/query_p0/sql_functions/size_funciton/test_size_function.out
new file mode 100644
index 0000000000..0c31b5dab0
--- /dev/null
+++
b/regression-test/data/query_p0/sql_functions/size_funciton/test_size_function.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+4 ["aaa", "bbb", NULL, "fff"] ["aaa", "bbb", NULL, "fff"]
+
+-- !sql --
+4 ["2020-01-02", "2021-01-01", "2022-01-03", "1996-04-17"]
["2020-01-02", "2021-01-01", "2022-01-03", "1996-04-17"]
+
+-- !sql --
+4 ["aaa", "bbb", NULL, "fff"] ["aaa", "bbb", NULL, "fff"]
+
+-- !sql --
+4 ["2020-01-02", "2021-01-01", "2022-01-03", "1996-04-17"]
["2020-01-02", "2021-01-01", "2022-01-03", "1996-04-17"]
+
+-- !sql --
+4 ["aaa", "bbb", NULL, "fff"] ["aaa", "bbb", NULL, "fff"]
+
+-- !sql --
+4 ["2020-01-02", "2021-01-01", "2022-01-03", "1996-04-17"]
["2020-01-02", "2021-01-01", "2022-01-03", "1996-04-17"]
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !select_00 --
+1 3 3
+5 3 0
+5 3 3
+4 3 2
+
diff --git
a/regression-test/suites/query_p0/sql_functions/size_funciton/test_size_function.groovy
b/regression-test/suites/query_p0/sql_functions/size_funciton/test_size_function.groovy
new file mode 100644
index 0000000000..fe2607ecb1
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/size_funciton/test_size_function.groovy
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_size_function") {
+ sql """ set enable_nereids_planner = false; """
+ // literal
+
+ qt_sql "SELECT size(array_shuffle(['aaa', null, 'bbb', 'fff'])),
array_shuffle(['aaa', null, 'bbb', 'fff'], 0), shuffle(['aaa', null, 'bbb',
'fff'], 0)"
+ qt_sql """select size(array("2020-01-02", "2022-01-03", "2021-01-01",
"1996-04-17")), array_shuffle(array("2020-01-02", "2022-01-03", "2021-01-01",
"1996-04-17"), 0), shuffle(array("2020-01-02", "2022-01-03", "2021-01-01",
"1996-04-17"), 0)"""
+ qt_sql "SELECT array_size(array_shuffle(['aaa', null, 'bbb', 'fff'])),
array_shuffle(['aaa', null, 'bbb', 'fff'], 0), shuffle(['aaa', null, 'bbb',
'fff'], 0)"
+ qt_sql """select array_size(array("2020-01-02", "2022-01-03",
"2021-01-01", "1996-04-17")), array_shuffle(array("2020-01-02", "2022-01-03",
"2021-01-01", "1996-04-17"), 0), shuffle(array("2020-01-02", "2022-01-03",
"2021-01-01", "1996-04-17"), 0)"""
+ qt_sql "SELECT (cardinality(['aaa', null, 'bbb', 'fff'])),
array_shuffle(['aaa', null, 'bbb', 'fff'], 0), shuffle(['aaa', null, 'bbb',
'fff'], 0)"
+ qt_sql """select cardinality(array("2020-01-02", "2022-01-03",
"2021-01-01", "1996-04-17")), array_shuffle(array("2020-01-02", "2022-01-03",
"2021-01-01", "1996-04-17"), 0), shuffle(array("2020-01-02", "2022-01-03",
"2021-01-01", "1996-04-17"), 0)"""
+
+ qt_sql "SELECT size(map(1,2,2,null));"
+ qt_sql "SELECT map_size(map(1,2,2,null));"
+
+ // table
+ def tableName = "test_size"
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql """
+ CREATE TABLE IF NOT EXISTS `${tableName}` (
+ `id` int(11) NULL,
+ `c_array1` array<int(11)> NULL,
+ `c_array2` array<varchar(20)> NULL,
+ `c_map` map<int(11), string> NULL,
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ )
+ """
+ sql """INSERT INTO ${tableName} values
+ (0, [2], ['123', '124', '125'], {1: "", null: "a", 2: "b"}),
+ (1, [1,2,3,4,5], ['234', '124', '125'], {}),
+ (2, [1,2,10,12,10], ['345', '234', '123'], {1: "a", 2: "b", 3:
"c"}),
+ (3, [1,3,4,2], ['222', '444', '555'], {11: NULL, 0:"ss"});
+ """
+ qt_select_00 " select size(c_array1), size(c_array2), size(c_map) from
${tableName} order by id;"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]