This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1c3cc77a54 [fix](function) to_bitmap parameter parsing failure returns
null instead of bitmap_empty (#21236)
1c3cc77a54 is described below
commit 1c3cc77a54938ed948ad8186b8dea8385977d23c
Author: ZenoYang <[email protected]>
AuthorDate: Fri Aug 18 14:37:49 2023 +0800
[fix](function) to_bitmap parameter parsing failure returns null instead of
bitmap_empty (#21236)
* [fix](function) to_bitmap parameter parsing failure returns null instead
of bitmap_empty
* add ut
* fix nereids
* fix regression-test
---
be/src/util/bitmap_value.h | 18 ++++++
be/src/vec/functions/function_bitmap.cpp | 71 ++++++++--------------
be/test/vec/function/function_bitmap_test.cpp | 27 ++++++++
be/test/vec/function/function_test_util.h | 22 +++++++
.../expressions/functions/scalar/ToBitmap.java | 4 +-
gensrc/script/doris_builtins_functions.py | 6 +-
.../data/nereids_function_p0/scalar_function/B.out | 6 +-
.../bitmap_functions/test_bitmap_function.out | 2 +-
.../bitmap_functions/test_bitmap_function.out | 2 +-
9 files changed, 101 insertions(+), 57 deletions(-)
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index 52c4caaa41..ec75c4141c 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -1846,6 +1846,24 @@ public:
return *this;
}
+ bool operator==(const BitmapValue& rhs) const {
+ if (_type != rhs._type) return false;
+
+ switch (_type) {
+ case BitmapValue::BitmapDataType::EMPTY:
+ return true;
+ case BitmapValue::BitmapDataType::SINGLE:
+ return _sv == rhs._sv;
+ case BitmapValue::BitmapDataType::BITMAP:
+ return _bitmap == rhs._bitmap;
+ case BitmapValue::BitmapDataType::SET:
+ return _set == rhs._set;
+ default:
+ CHECK(false) << _type;
+ }
+ return false;
+ }
+
// check if value x is present
bool contains(uint64_t x) const {
switch (_type) {
diff --git a/be/src/vec/functions/function_bitmap.cpp
b/be/src/vec/functions/function_bitmap.cpp
index 578ce0e34d..5b03abdcbe 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -86,55 +86,31 @@ struct BitmapEmpty {
struct ToBitmap {
static constexpr auto name = "to_bitmap";
- using ReturnType = DataTypeBitMap;
-
- template <typename ColumnType>
- static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
- execute<ColumnType, false>(col, nullptr, col_res);
- }
- template <typename ColumnType>
- static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
- MutableColumnPtr& col_res) {
- execute<ColumnType, true>(col, &nullmap, col_res);
- }
- template <typename ColumnType, bool arg_is_nullable>
- static void execute(const ColumnType* col, const NullMap* nullmap,
MutableColumnPtr& col_res) {
- if constexpr (std::is_same_v<ColumnType, ColumnString>) {
- const ColumnString::Chars& data = col->get_chars();
- const ColumnString::Offsets& offsets = col->get_offsets();
-
- auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
- auto& res_data = res_column->get_data();
- size_t size = offsets.size();
-
- for (size_t i = 0; i < size; ++i) {
- if (arg_is_nullable && ((*nullmap)[i])) {
- continue;
- } else {
- const char* raw_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
- size_t str_size = offsets[i] - offsets[i - 1];
- StringParser::ParseResult parse_result =
StringParser::PARSE_SUCCESS;
- uint64_t int_value =
StringParser::string_to_unsigned_int<uint64_t>(
- raw_str, str_size, &parse_result);
- if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
- res_data[i].add(int_value);
- }
- }
- }
- } else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
- auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
- auto& res_data = res_column->get_data();
- size_t size = col->size();
+ using ArgumentType = DataTypeString;
- for (size_t i = 0; i < size; ++i) {
- if constexpr (arg_is_nullable) {
- if ((*nullmap)[i]) {
- continue;
- }
- }
- res_data[i].add(col->get_data()[i]);
+ static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ std::vector<BitmapValue>& res_data, NullMap& null_map,
+ size_t input_rows_count) {
+ res_data.resize(input_rows_count);
+ if (offsets.size() == 0 && input_rows_count == 1) {
+ // For NULL constant
+ res_data.emplace_back();
+ null_map[0] = 1;
+ return Status::OK();
+ }
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ const char* raw_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
+ size_t str_size = offsets[i] - offsets[i - 1];
+ StringParser::ParseResult parse_result =
StringParser::PARSE_SUCCESS;
+ uint64_t int_value =
StringParser::string_to_unsigned_int<uint64_t>(raw_str, str_size,
+
&parse_result);
+ if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
+ res_data[i].add(int_value);
+ } else {
+ null_map[i] = 1;
}
}
+ return Status::OK();
}
};
@@ -311,6 +287,7 @@ public:
auto& res = res_data_column->get_data();
ColumnPtr& argument_column =
block.get_by_position(arguments[0]).column;
+ // todo(zeno) int can avoid cast into string or char
if constexpr (std::is_same_v<typename Impl::ArgumentType,
DataTypeString>) {
const auto& str_column = static_cast<const
ColumnString&>(*argument_column);
const ColumnString::Chars& data = str_column.get_chars();
@@ -1105,7 +1082,7 @@ public:
};
using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
-using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
+using FunctionToBitmap = FunctionBitmapAlwaysNull<ToBitmap>;
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck,
true>;
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
diff --git a/be/test/vec/function/function_bitmap_test.cpp
b/be/test/vec/function/function_bitmap_test.cpp
index 689199e5c5..d8b19e5c78 100644
--- a/be/test/vec/function/function_bitmap_test.cpp
+++ b/be/test/vec/function/function_bitmap_test.cpp
@@ -27,6 +27,7 @@
#include "testutil/any_type.h"
#include "util/bitmap_value.h"
#include "vec/core/types.h"
+#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
@@ -207,4 +208,30 @@ TEST(function_bitmap_test, function_bitmap_has_all) {
check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
}
+TEST(function_bitmap_test, function_to_bitmap) {
+ std::string func_name = "to_bitmap";
+ InputTypeSet input_types = {TypeIndex::String};
+
+ BitmapValue bitmap1(1);
+ DataSet data_set = {{{std::string("1")}, &bitmap1},
+ {{std::string("-1")}, Null()},
+ {{std::string("1.11")}, Null()},
+ {{Null()}, Null()}};
+
+ check_function<DataTypeBitMap, true>(func_name, input_types, data_set);
+}
+
+TEST(function_bitmap_test, function_bitmap_from_string) {
+ std::string func_name = "bitmap_from_string";
+ InputTypeSet input_types = {TypeIndex::String};
+
+ BitmapValue bitmap1(1);
+ DataSet data_set = {{{std::string("1")}, &bitmap1},
+ {{std::string("-1")}, Null()},
+ {{std::string("1.11")}, Null()},
+ {{Null()}, Null()}};
+
+ check_function<DataTypeBitMap, true>(func_name, input_types, data_set);
+}
+
} // namespace doris::vectorized
diff --git a/be/test/vec/function/function_test_util.h
b/be/test/vec/function/function_test_util.h
index c4e3f4379d..9ca385b154 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -43,6 +43,7 @@
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/simple_function_factory.h"
@@ -290,6 +291,17 @@ Status check_function(const std::string& func_name, const
InputTypeSet& input_ty
ColumnPtr column = block.get_columns()[result];
EXPECT_TRUE(column != nullptr);
+ ColumnPtr nested_col = nullptr;
+ ColumnPtr null_map_col = ColumnUInt8::create(row_size, 0);
+ if (doris::vectorized::is_column_nullable(*column)) {
+ const auto& null_column = check_and_get_column<ColumnNullable>(column);
+ nested_col = null_column->get_nested_column_ptr();
+ null_map_col = null_column->get_null_map_column_ptr();
+ } else {
+ nested_col = column;
+ }
+ const auto& null_map =
check_and_get_column<ColumnUInt8>(null_map_col)->get_data();
+
for (int i = 0; i < row_size; ++i) {
auto check_column_data = [&]() {
if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) {
@@ -303,6 +315,16 @@ Status check_function(const std::string& func_name, const
InputTypeSet& input_ty
EXPECT_EQ(expect_data,
JsonbToJson::jsonb_to_json_string(s.data, s.size))
<< " at row " << i;
}
+ } else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) {
+ const auto* expect_data =
+ any_cast<typename
ReturnType::FieldType*>(data_set[i].second);
+ if (null_map[i]) {
+ EXPECT_TRUE(expect_data->empty()) << " at row " << i;
+ } else {
+ auto& bitmap_value =
+
check_and_get_column<ColumnBitmap>(nested_col)->get_data()[i];
+ EXPECT_EQ(*expect_data, bitmap_value) << " at row " << i;
+ }
} else {
Field field;
column->get(i, field);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java
index cf833ed247..5a3e9647f8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBitmap.java
@@ -19,7 +19,7 @@ package
org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
@@ -36,7 +36,7 @@ import java.util.List;
* ScalarFunction 'to_bitmap'. This class is generated by GenerateFunction.
*/
public class ToBitmap extends ScalarFunction
- implements UnaryExpression, ExplicitlyCastableSignature,
AlwaysNotNullable {
+ implements UnaryExpression, ExplicitlyCastableSignature,
AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BitmapType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT),
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 309e5f4c77..72ef074d7d 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1758,11 +1758,11 @@ visible_functions = {
#bitmap function
"Bitmap": [
- [['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
+ [['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'],
'ALWAYS_NOT_NULLABLE'],
- [['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
+ [['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['STRING'],
'ALWAYS_NOT_NULLABLE'],
- [['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'],
+ [['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['BIGINT'],
'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash64'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
diff --git a/regression-test/data/nereids_function_p0/scalar_function/B.out
b/regression-test/data/nereids_function_p0/scalar_function/B.out
index 04a79b36a7..f1ec59eb04 100644
--- a/regression-test/data/nereids_function_p0/scalar_function/B.out
+++ b/regression-test/data/nereids_function_p0/scalar_function/B.out
@@ -319,7 +319,7 @@ true
\N
-- !sql_bitmap_has_all_Bitmap_Bitmap --
-true
+\N
true
true
true
@@ -348,7 +348,7 @@ true
true
-- !sql_bitmap_has_any_Bitmap_Bitmap --
-false
+\N
true
true
true
@@ -696,7 +696,7 @@ true
\N
-- !sql_bitmap_to_string_Bitmap --
-
+\N
1
2
3
diff --git
a/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out
b/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out
index d344ad27bf..0f784e635c 100644
---
a/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out
+++
b/regression-test/data/nereids_p0/sql_functions/bitmap_functions/test_bitmap_function.out
@@ -406,7 +406,7 @@ true
1
-- !sql --
-
+\N
-- !sql --
\N
diff --git
a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
index 4eeae0ccd9..885a24ddee 100644
---
a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
+++
b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
@@ -406,7 +406,7 @@ true
1
-- !sql --
-
+\N
-- !sql --
\N
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]