This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new be55cb8dfc [Improve](jsonb_extract) support jsonb_extract multi parse
path (#21555)
be55cb8dfc is described below
commit be55cb8dfc4da4fefc25eb0f538a71454580b5e9
Author: amory <[email protected]>
AuthorDate: Wed Jul 12 21:37:36 2023 +0800
[Improve](jsonb_extract) support jsonb_extract multi parse path (#21555)
support jsonb_extract multi parse path
---
be/src/vec/functions/function_jsonb.cpp | 154 +++++++++++++++------
gensrc/script/doris_builtins_functions.py | 8 +-
.../data/jsonb_p0/test_jsonb_load_and_function.out | 23 +++
.../test_jsonb_load_unique_key_and_function.out | 23 +++
.../jsonb_p0/test_jsonb_load_and_function.groovy | 3 +
.../test_jsonb_load_unique_key_and_function.groovy | 4 +
6 files changed, 171 insertions(+), 44 deletions(-)
diff --git a/be/src/vec/functions/function_jsonb.cpp
b/be/src/vec/functions/function_jsonb.cpp
index 42bdbe4b85..d9e498b4cf 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -339,7 +339,7 @@ using FunctionJsonbParseNotnullErrorValue =
using FunctionJsonbParseNotnullErrorInvalid =
FunctionJsonbParseBase<NullalbeMode::NOT_NULL,
JsonbParseErrorMode::RETURN_INVALID>;
-// func(json,string) -> nullable(type)
+// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
template <typename Impl>
class FunctionJsonbExtract : public IFunction {
public:
@@ -347,7 +347,8 @@ public:
static constexpr auto alias = Impl::alias;
static FunctionPtr create() { return
std::make_shared<FunctionJsonbExtract>(); }
String get_name() const override { return name; }
- size_t get_number_of_arguments() const override { return 2; }
+ bool is_variadic() const override { return true; }
+ size_t get_number_of_arguments() const override { return 0; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
return make_nullable(std::make_shared<typename Impl::ReturnType>());
}
@@ -355,26 +356,32 @@ public:
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
- DCHECK_EQ(arguments.size(), 2);
- ColumnPtr argument_columns[2];
- bool col_const[2];
- for (int i = 0; i < 2; ++i) {
- std::tie(argument_columns[i], col_const[i]) =
-
unpack_if_const(block.get_by_position(arguments[i]).column);
- check_set_nullable(argument_columns[i], null_map, col_const[i]);
+ DCHECK_GE(arguments.size(), 2);
+
+ ColumnPtr jsonb_data_column;
+ bool jsonb_data_const = false;
+ // prepare jsonb data column
+ std::tie(jsonb_data_column, jsonb_data_const) =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ check_set_nullable(jsonb_data_column, null_map, jsonb_data_const);
+ auto& ldata = assert_cast<const
ColumnString*>(jsonb_data_column.get())->get_chars();
+ auto& loffsets = assert_cast<const
ColumnString*>(jsonb_data_column.get())->get_offsets();
+
+ // prepare parse path column prepare
+ std::vector<const ColumnString*> jsonb_path_columns;
+ std::vector<bool> path_const(arguments.size() - 1);
+ for (int i = 0; i < arguments.size() - 1; ++i) {
+ ColumnPtr path_column;
+ bool is_const = false;
+ std::tie(path_column, is_const) =
+ unpack_if_const(block.get_by_position(arguments[i +
1]).column);
+ path_const[i] = is_const;
+ check_set_nullable(path_column, null_map, path_const[i]);
+ jsonb_path_columns.push_back(assert_cast<const
ColumnString*>(path_column.get()));
}
auto res = Impl::ColumnType::create();
- auto jsonb_data_column = assert_cast<const
ColumnString*>(argument_columns[0].get());
- auto jsonb_path_column = assert_cast<const
ColumnString*>(argument_columns[1].get());
-
- auto& ldata = jsonb_data_column->get_chars();
- auto& loffsets = jsonb_data_column->get_offsets();
-
- auto& rdata = jsonb_path_column->get_chars();
- auto& roffsets = jsonb_path_column->get_offsets();
-
bool is_invalid_json_path = false;
// execute Impl
@@ -382,36 +389,34 @@ public:
std::is_same_v<typename Impl::ReturnType,
DataTypeJsonb>) {
auto& res_data = res->get_chars();
auto& res_offsets = res->get_offsets();
- if (col_const[0]) {
- Impl::scalar_vector(context,
jsonb_data_column->get_data_at(0), rdata, roffsets,
- res_data, res_offsets,
null_map->get_data(),
- is_invalid_json_path);
- } else if (col_const[1]) {
- Impl::vector_scalar(context, ldata, loffsets,
jsonb_path_column->get_data_at(0),
- res_data, res_offsets,
null_map->get_data(),
- is_invalid_json_path);
- } else {
- Impl::vector_vector(context, ldata, loffsets, rdata, roffsets,
res_data,
- res_offsets, null_map->get_data(),
is_invalid_json_path);
+ Status st = Impl::vector_vector_v2(
+ context, ldata, loffsets, jsonb_data_const,
jsonb_path_columns, path_const,
+ res_data, res_offsets, null_map->get_data(),
is_invalid_json_path);
+ if (st != Status::OK()) {
+ return st;
}
} else {
- if (col_const[0]) {
+ // not support other extract type for now (e.g. int, double, ...)
+ DCHECK_EQ(jsonb_path_columns.size(), 1);
+ auto& rdata = jsonb_path_columns[0]->get_chars();
+ auto& roffsets = jsonb_path_columns[0]->get_offsets();
+ if (jsonb_data_const) {
Impl::scalar_vector(context,
jsonb_data_column->get_data_at(0), rdata, roffsets,
res->get_data(), null_map->get_data(),
is_invalid_json_path);
- } else if (col_const[1]) {
- Impl::vector_scalar(context, ldata, loffsets,
jsonb_path_column->get_data_at(0),
+ } else if (path_const[0]) {
+ Impl::vector_scalar(context, ldata, loffsets,
jsonb_path_columns[0]->get_data_at(0),
res->get_data(), null_map->get_data(),
is_invalid_json_path);
} else {
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets,
res->get_data(),
null_map->get_data(),
is_invalid_json_path);
}
- }
-
- if (is_invalid_json_path) {
- return Status::InvalidArgument(
- "Json path error: {} for value: {}",
- JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
- std::string_view(reinterpret_cast<const
char*>(rdata.data()), rdata.size()));
+ if (is_invalid_json_path) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(rdata.data()),
+ rdata.size()));
+ }
}
block.get_by_position(result).column =
@@ -503,6 +508,77 @@ private:
public:
// for jsonb_extract_string
+ static Status vector_vector_v2(
+ FunctionContext* context, const ColumnString::Chars& ldata,
+ const ColumnString::Offsets& loffsets, const bool& json_data_const,
+ const std::vector<const ColumnString*>& rdata_columns, // here we
can support more paths
+ const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+ ColumnString::Offsets& res_offsets, NullMap& null_map, bool&
is_invalid_json_path) {
+ size_t input_rows_count = json_data_const ? rdata_columns.size() :
loffsets.size();
+ res_offsets.resize(input_rows_count);
+
+ auto writer = std::make_unique<JsonbWriter>();
+ std::unique_ptr<JsonbToJson> formater;
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, res_data, res_offsets, null_map);
+ continue;
+ }
+ size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+ size_t l_size = loffsets[index_check_const(i, json_data_const)] -
l_off;
+ const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+ if (rdata_columns.size() == 1) { // just return origin value
+ const ColumnString* path_col = rdata_columns[0];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i, path_const[0]) -
1];
+ size_t r_size = roffsets[index_check_const(i, path_const[0])]
- r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ inner_loop_impl(i, res_data, res_offsets, null_map, writer,
formater, l_raw, l_size,
+ r_raw, r_size, is_invalid_json_path);
+ } else { // will make array string to user
+ writer->reset();
+ writer->writeStartArray();
+ for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+ const ColumnString* path_col = rdata_columns[pi];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i,
path_const[pi]) - 1];
+ size_t r_size = roffsets[index_check_const(i,
path_const[pi])] - r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ // doc is NOT necessary to be deleted since JsonbDocument
will not allocate memory
+ JsonbDocument* doc = JsonbDocument::createDocument(l_raw,
l_size);
+ if (UNLIKELY(!doc || !doc->getValue())) {
+ writer->writeNull();
+ continue;
+ }
+ // value is NOT necessary to be deleted since JsonbValue
will not allocate memory
+ JsonbValue* value =
+ doc->getValue()->findPath(r_raw, r_size,
is_invalid_json_path, nullptr);
+ // if not valid json path , should return error message to
user
+ if (is_invalid_json_path) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(rdata.data()),
+ rdata.size()));
+ }
+ if (UNLIKELY(!value)) {
+ writer->writeNull();
+ } else {
+ writer->writeValue(value);
+ }
+ }
+ writer->writeEndArray();
+
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
+
writer->getOutput()->getSize()),
+ i, res_data, res_offsets);
+ }
+ } //for
+ return Status::OK();
+ }
+
static void vector_vector(FunctionContext* context, const
ColumnString::Chars& ldata,
const ColumnString::Offsets& loffsets,
const ColumnString::Chars& rdata,
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index ed17bf524a..bf34a7cf52 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1690,11 +1690,11 @@ visible_functions = {
[['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'VARCHAR'], ''],
[['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'STRING'], ''],
- [['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
+ [['jsonb_type'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
- [['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
- [['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
+ [['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR', '...'],
'ALWAYS_NULLABLE'],
+ [['jsonb_extract'], 'JSONB', ['JSONB', 'STRING', '...'],
'ALWAYS_NULLABLE'],
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'],
'ALWAYS_NULLABLE'],
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'],
'ALWAYS_NULLABLE'],
[['jsonb_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'],
'ALWAYS_NULLABLE'],
@@ -1725,8 +1725,6 @@ visible_functions = {
[['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
- [['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
- [['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'],
'ALWAYS_NULLABLE'],
[['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'],
'ALWAYS_NULLABLE'],
[['json_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'],
'ALWAYS_NULLABLE'],
diff --git a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
index 00944828bd..e1c176273c 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
@@ -709,6 +709,29 @@
27 {"k1":"v1","k2":200} \N
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
+-- !jsonb_extract_multipath --
+1 \N \N
+2 null [null,null,null,null]
+3 true [true,null,null,null]
+4 false [false,null,null,null]
+5 100 [100,null,null,null]
+6 10000 [10000,null,null,null]
+7 1000000000 [1000000000,null,null,null]
+8 1152921504606846976 [1152921504606846976,null,null,null]
+9 6.18 [6.18,null,null,null]
+10 "abcd" ["abcd",null,null,null]
+11 {} [{},{},null,{}]
+12 {"k1":"v31","k2":300}
[{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
+13 [] [[],null,null,null]
+14 [123,456] [[123,456],null,null,123]
+15 ["abc","def"] [["abc","def"],null,null,"abc"]
+16 [null,true,false,100,6.18,"abc"]
[[null,true,false,100,6.18,"abc"],null,null,null]
+17 [{"k1":"v41","k2":400},1,"a",3.14]
[[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
+18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}
[{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
+26 \N \N
+27 {"k1":"v1","k2":200}
[{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
+28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}
[{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
+
-- !jsonb_extract_string_select --
1 \N \N
2 null null
diff --git
a/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
b/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
index 04946beaa1..632badf32e 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_load_unique_key_and_function.out
@@ -709,6 +709,29 @@
27 {"k1":"v1","k2":200} \N
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
+-- !jsonb_extract_multipath --
+1 \N \N
+2 null [null,null,null,null]
+3 true [true,null,null,null]
+4 false [false,null,null,null]
+5 100 [100,null,null,null]
+6 10000 [10000,null,null,null]
+7 1000000000 [1000000000,null,null,null]
+8 1152921504606846976 [1152921504606846976,null,null,null]
+9 6.18 [6.18,null,null,null]
+10 "abcd" ["abcd",null,null,null]
+11 {} [{},{},null,{}]
+12 {"k1":"v31","k2":300}
[{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
+13 [] [[],null,null,null]
+14 [123,456] [[123,456],null,null,123]
+15 ["abc","def"] [["abc","def"],null,null,"abc"]
+16 [null,true,false,100,6.18,"abc"]
[[null,true,false,100,6.18,"abc"],null,null,null]
+17 [{"k1":"v41","k2":400},1,"a",3.14]
[[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
+18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}
[{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
+26 \N \N
+27 {"k1":"v1","k2":200}
[{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
+28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}
[{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
+
-- !jsonb_extract_string_select --
1 \N \N
2 null null
diff --git
a/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
b/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
index 6941f335c2..d58b991ef2 100644
--- a/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
+++ b/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy
@@ -178,6 +178,9 @@ suite("test_jsonb_load_and_function", "p0") {
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable}
ORDER BY id"
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable}
ORDER BY id"
+ // jsonb_extract_multipath
+ qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*',
'\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
+
// jsonb_extract_string
qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j,
'\$') FROM ${testTable} ORDER BY id"
diff --git
a/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
b/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
index eee42e3a69..5423ad33fc 100644
---
a/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
+++
b/regression-test/suites/jsonb_p0/test_jsonb_load_unique_key_and_function.groovy
@@ -171,6 +171,10 @@ suite("test_jsonb_unique_load_and_function", "p0") {
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable}
ORDER BY id"
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable}
ORDER BY id"
+
+ // jsonb_extract_multipath
+ qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*',
'\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
+
// jsonb_extract_string
qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j,
'\$') FROM ${testTable} ORDER BY id"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]