This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 2fdc378e1aa [cherry-pick](jsonb) fix invalid jsonb value write into
segment file which make be crash (#48731)
2fdc378e1aa is described below
commit 2fdc378e1aaff3ef6e4b390ad7de35516865500e
Author: amory <[email protected]>
AuthorDate: Thu Mar 6 21:31:47 2025 +0800
[cherry-pick](jsonb) fix invalid jsonb value write into segment file which
make be crash (#48731)
…ke select core (#48625)
fix invalid jsonb value write into segment file which make select core,
so we add a check for jsonb value when convert_to_olap which value will
be written into segment file
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [x] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/util/jsonb_document.h | 4 +-
be/src/util/jsonb_utils.h | 2 +-
be/src/util/jsonb_writer.h | 3 +-
.../exprs/table_function/vexplode_json_array.cpp | 2 +-
.../exprs/table_function/vexplode_json_object.cpp | 4 +-
be/src/vec/functions/function_cast.h | 4 +-
be/src/vec/functions/function_jsonb.cpp | 17 +-
be/src/vec/jsonb/serialize.cpp | 2 +-
be/src/vec/olap/olap_data_convertor.cpp | 23 +-
be/src/vec/olap/olap_data_convertor.h | 4 +-
.../vec/data_types/common_data_type_serder_test.h | 2 +-
.../vec/data_types/serde/data_type_serde_test.cpp | 4 +-
be/test/vec/olap/jsonb_value_test.cpp | 242 +++++++++++++++++++++
13 files changed, 287 insertions(+), 26 deletions(-)
diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index 8a95ccef8d9..ed778843f0d 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -177,7 +177,7 @@ public:
static JsonbDocument* makeDocument(char* pb, uint32_t size, const
JsonbValue* rval);
// create an JsonbDocument object from JSONB packed bytes
- static JsonbDocument* createDocument(const char* pb, uint32_t size);
+ static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size);
// create an JsonbValue from JSONB packed bytes
static JsonbValue* createValue(const char* pb, uint32_t size);
@@ -1109,7 +1109,7 @@ inline JsonbDocument* JsonbDocument::makeDocument(char*
pb, uint32_t size, const
return doc;
}
-inline JsonbDocument* JsonbDocument::createDocument(const char* pb, uint32_t
size) {
+inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb,
size_t size) {
if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
return nullptr;
}
diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h
index 7dba0dca3af..c32588e2610 100644
--- a/be/src/util/jsonb_utils.h
+++ b/be/src/util/jsonb_utils.h
@@ -40,7 +40,7 @@ public:
// get json string
const std::string to_json_string(const char* data, size_t size) {
- JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size);
+ JsonbDocument* pdoc =
doris::JsonbDocument::checkAndCreateDocument(data, size);
if (!pdoc) {
LOG(FATAL) << "invalid json binary value: " <<
std::string_view(data, size);
}
diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h
index 61bd28bb783..f92d8a4096b 100644
--- a/be/src/util/jsonb_writer.h
+++ b/be/src/util/jsonb_writer.h
@@ -479,7 +479,8 @@ public:
OS_TYPE* getOutput() { return os_; }
JsonbDocument* getDocument() {
- return JsonbDocument::createDocument(getOutput()->getBuffer(),
getOutput()->getSize());
+ return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
+ getOutput()->getSize());
}
JsonbValue* getValue() {
diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
index 3c22ef4e078..7594d9a5cc6 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
@@ -63,7 +63,7 @@ void
VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) {
StringRef text = _text_column->get_data_at(row_idx);
if (text.data != nullptr) {
if (WhichDataType(_text_datatype).is_json()) {
- JsonbDocument* doc = JsonbDocument::createDocument(text.data,
text.size);
+ JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(text.data, text.size);
if (doc && doc->getValue() && doc->getValue()->isArray()) {
auto* a = (ArrayVal*)doc->getValue();
if (a->numElem() > 0) {
diff --git a/be/src/vec/exprs/table_function/vexplode_json_object.cpp
b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
index 1981f48f62c..38a00d60b19 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_object.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_object.cpp
@@ -54,8 +54,8 @@ void VExplodeJsonObjectTableFunction::process_row(size_t
row_idx) {
StringRef text = _json_object_column->get_data_at(row_idx);
if (text.data != nullptr) {
- JsonbDocument* doc = JsonbDocument::createDocument(text.data,
text.size);
- if (UNLIKELY(!doc || !doc->getValue())) {
+ JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data,
text.size);
+ if (!doc || !doc->getValue()) [[unlikely]] {
// error jsonb, put null into output, cur_size = 0 , we will
insert_default
return;
}
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 0cc2e9e2862..3db38d73d84 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -718,7 +718,7 @@ struct ConvertImplGenericFromJsonb {
const bool is_dst_string = is_string_or_fixed_string(data_type_to);
for (size_t i = 0; i < size; ++i) {
const auto& val = col_from_string->get_data_at(i);
- JsonbDocument* doc = JsonbDocument::createDocument(val.data,
val.size);
+ JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(val.data, val.size);
if (UNLIKELY(!doc || !doc->getValue())) {
(*vec_null_map_to)[i] = 1;
col_to->insert_default();
@@ -881,7 +881,7 @@ struct ConvertImplFromJsonb {
}
// doc is NOT necessary to be deleted since JsonbDocument will
not allocate memory
- JsonbDocument* doc = JsonbDocument::createDocument(val.data,
val.size);
+ JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(val.data, val.size);
if (UNLIKELY(!doc || !doc->getValue())) {
null_map[i] = 1;
res[i] = 0;
diff --git a/be/src/vec/functions/function_jsonb.cpp
b/be/src/vec/functions/function_jsonb.cpp
index 53ccec756fd..72f8a7e4a56 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -555,7 +555,7 @@ private:
continue;
}
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
- JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
+ JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw,
l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
dst_arr.clear();
return Status::InvalidArgument("jsonb data is invalid");
@@ -663,7 +663,7 @@ private:
static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const
char* l_raw_str,
int l_str_size, JsonbPath& path)
{
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str,
l_str_size);
+ JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str,
l_str_size);
if (UNLIKELY(!doc || !doc->getValue())) {
return;
}
@@ -758,7 +758,7 @@ private:
}
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
+ JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw,
l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
@@ -884,7 +884,7 @@ public:
writer->writeStartArray();
// doc is NOT necessary to be deleted since JsonbDocument will
not allocate memory
- JsonbDocument* doc = JsonbDocument::createDocument(l_raw,
l_size);
+ JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(l_raw, l_size);
for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
if (UNLIKELY(!doc || !doc->getValue())) {
@@ -1025,7 +1025,7 @@ private:
}
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str,
l_str_size);
+ JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str,
l_str_size);
if (UNLIKELY(!doc || !doc->getValue())) {
null_map[i] = 1;
res[i] = 0;
@@ -1404,7 +1404,8 @@ struct JsonbLengthUtil {
}
auto jsonb_value = jsonb_data_column->get_data_at(i);
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
- JsonbDocument* doc =
JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
+ JsonbDocument* doc =
+ JsonbDocument::checkAndCreateDocument(jsonb_value.data,
jsonb_value.size);
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
if (UNLIKELY(!value)) {
null_map->get_data()[i] = 1;
@@ -1539,9 +1540,9 @@ struct JsonbContainsUtil {
}
// doc is NOT necessary to be deleted since JsonbDocument will not
allocate memory
JsonbDocument* doc1 =
- JsonbDocument::createDocument(jsonb_value1.data,
jsonb_value1.size);
+ JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
jsonb_value1.size);
JsonbDocument* doc2 =
- JsonbDocument::createDocument(jsonb_value2.data,
jsonb_value2.size);
+ JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
jsonb_value2.size);
JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr);
JsonbValue* value2 = doc2->getValue();
diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp
index a35d722e015..d75d332f40c 100644
--- a/be/src/vec/jsonb/serialize.cpp
+++ b/be/src/vec/jsonb/serialize.cpp
@@ -91,7 +91,7 @@ void JsonbSerializeUtil::jsonb_to_block(const
DataTypeSerDeSPtrs& serdes, const
const std::unordered_map<uint32_t,
uint32_t>& col_id_to_idx,
Block& dst, const
std::vector<std::string>& default_values,
const std::unordered_set<int>&
include_cids) {
- auto pdoc = JsonbDocument::createDocument(data, size);
+ auto pdoc = JsonbDocument::checkAndCreateDocument(data, size);
JsonbDocument& doc = *pdoc;
size_t num_rows = dst.rows();
size_t filled_columns = 0;
diff --git a/be/src/vec/olap/olap_data_convertor.cpp
b/be/src/vec/olap/olap_data_convertor.cpp
index 8a5cd6d5b65..db441b671e9 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -163,7 +163,7 @@
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
return
std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal256>>();
}
case FieldType::OLAP_FIELD_TYPE_JSONB: {
- return std::make_unique<OlapColumnDataConvertorVarChar>(true);
+ return std::make_unique<OlapColumnDataConvertorVarChar>(true, true);
}
case FieldType::OLAP_FIELD_TYPE_BOOL: {
return
std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt8>>();
@@ -222,7 +222,10 @@
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
void OlapBlockDataConvertor::set_source_content(const vectorized::Block*
block, size_t row_pos,
size_t num_rows) {
DCHECK(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
- block->columns() == _convertors.size());
+ block->columns() == _convertors.size())
+ << "block=" << block->dump_structure() << ", block rows=" <<
block->rows()
+ << ", row_pos=" << row_pos << ", num_rows=" << num_rows
+ << ", convertors.size=" << _convertors.size();
size_t cid = 0;
for (const auto& typed_column : *block) {
if (typed_column.column->size() != block->rows()) {
@@ -619,8 +622,8 @@ Status
OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() {
// class OlapBlockDataConvertor::OlapColumnDataConvertorVarChar
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::OlapColumnDataConvertorVarChar(
- bool check_length)
- : _check_length(check_length) {}
+ bool check_length, bool is_jsonb)
+ : _check_length(check_length), _is_jsonb(is_jsonb) {}
void OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::set_source_column(
const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t
num_rows) {
@@ -664,6 +667,12 @@ Status
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
"Not support string len over than "
"`string_type_length_soft_limit_bytes` in vec
engine.");
}
+ // Make sure that the json binary data written in is the
correct jsonb value.
+ if (_is_jsonb &&
+ !doris::JsonbDocument::checkAndCreateDocument(slice->data,
slice->size)) {
+ return Status::InvalidArgument("invalid json binary value:
{}",
+
std::string_view(slice->data, slice->size));
+ }
} else {
// TODO: this may not be necessary, check and remove later
slice->data = nullptr;
@@ -685,6 +694,12 @@ Status
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
"Not support string len over than
`string_type_length_soft_limit_bytes`"
" in vec engine.");
}
+ // Make sure that the json binary data written in is the correct
jsonb value.
+ if (_is_jsonb &&
+ !doris::JsonbDocument::checkAndCreateDocument(slice->data,
slice->size)) {
+ return Status::InvalidArgument("invalid json binary value: {}",
+ std::string_view(slice->data,
slice->size));
+ }
string_offset = *offset_cur;
++slice;
++offset_cur;
diff --git a/be/src/vec/olap/olap_data_convertor.h
b/be/src/vec/olap/olap_data_convertor.h
index 500fc7dfc4a..e778826a20c 100644
--- a/be/src/vec/olap/olap_data_convertor.h
+++ b/be/src/vec/olap/olap_data_convertor.h
@@ -203,7 +203,7 @@ private:
class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase {
public:
- OlapColumnDataConvertorVarChar(bool check_length);
+ OlapColumnDataConvertorVarChar(bool check_length, bool is_jsonb =
false);
~OlapColumnDataConvertorVarChar() override = default;
void set_source_column(const ColumnWithTypeAndName& typed_column,
size_t row_pos,
@@ -215,6 +215,8 @@ private:
private:
bool _check_length;
+ bool _is_jsonb =
+ false; // Make sure that the json binary data written in is
the correct jsonb value.
PaddedPODArray<Slice> _slice;
};
diff --git a/be/test/vec/data_types/common_data_type_serder_test.h
b/be/test/vec/data_types/common_data_type_serder_test.h
index a970cda7fbc..4a01436c8ef 100644
--- a/be/test/vec/data_types/common_data_type_serder_test.h
+++ b/be/test/vec/data_types/common_data_type_serder_test.h
@@ -286,7 +286,7 @@ public:
EXPECT_EQ(jsonb_column->size(), load_cols[0]->size());
for (size_t r = 0; r < jsonb_column->size(); ++r) {
StringRef jsonb_data = jsonb_column->get_data_at(r);
- auto pdoc = JsonbDocument::createDocument(jsonb_data.data,
jsonb_data.size);
+ auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size);
JsonbDocument& doc = *pdoc;
size_t cIdx = 0;
for (auto it = doc->begin(); it != doc->end(); ++it) {
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index 82674b0aa44..3c9498f1d6d 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -240,7 +240,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
- auto pdoc = JsonbDocument::createDocument(jsonb_data.data,
jsonb_data.size);
+ auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size);
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
@@ -270,7 +270,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
- auto pdoc = JsonbDocument::createDocument(jsonb_data.data,
jsonb_data.size);
+ auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data,
jsonb_data.size);
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
diff --git a/be/test/vec/olap/jsonb_value_test.cpp
b/be/test/vec/olap/jsonb_value_test.cpp
new file mode 100644
index 00000000000..3111163c0be
--- /dev/null
+++ b/be/test/vec/olap/jsonb_value_test.cpp
@@ -0,0 +1,242 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+
+#include <string>
+
+#include "gtest/gtest_pred_impl.h"
+#include "vec/columns/column_string.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/columns_with_type_and_name.h"
+#include "vec/data_types/serde/data_type_serde.h"
+#include "vec/olap/olap_data_convertor.h"
+
+namespace doris::vectorized {
+
+TEST(JsonbValueConvertorTest, JsonbValueValid) {
+ // 1. create jsonb column with serde
+ auto input = ColumnString::create();
+ auto dataTypeJsonb = std::make_shared<DataTypeJsonb>();
+ auto serde = dataTypeJsonb->get_serde();
+ vectorized::DataTypeSerDe::FormatOptions options;
+
+ // Test case 1
+ std::string str1 = "{\"key1\": \"value1\"}";
+ Slice slice1 = Slice(str1.data(), str1.length());
+ auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options);
+ ASSERT_TRUE(st1.ok());
+ ASSERT_EQ(input->size(), 1);
+
+ // Test case 2
+ std::string str2 = "{\"key2\": 12345}";
+ Slice slice2 = Slice(str2.data(), str2.length());
+ auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options);
+ ASSERT_TRUE(st2.ok());
+ ASSERT_EQ(input->size(), 2);
+
+ // Test case 3
+ std::string str3 = "{\"key3\": true}";
+ Slice slice3 = Slice(str3.data(), str3.length());
+ auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options);
+ ASSERT_TRUE(st3.ok());
+ ASSERT_EQ(input->size(), 3);
+
+ // Test case 4
+ std::string str4 = "{\"key4\": [1, 2, 3]}";
+ Slice slice4 = Slice(str4.data(), str4.length());
+ auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options);
+ ASSERT_TRUE(st4.ok());
+ ASSERT_EQ(input->size(), 4);
+
+ // Test case 5
+ std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}";
+ Slice slice5 = Slice(str5.data(), str5.length());
+ auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options);
+ ASSERT_TRUE(st5.ok());
+ ASSERT_EQ(input->size(), 5);
+
+ // 2. put column into block
+ vectorized::ColumnWithTypeAndName argument(input->assume_mutable(),
dataTypeJsonb,
+ "jsonb_column");
+ Block block;
+ block.insert(argument);
+
+ // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert
column data to segment file data
+ auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>();
+ TabletColumn jsonb_column = TabletColumn();
+ jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB);
+ _olap_data_convertor->add_column_data_convertor(jsonb_column);
+ _olap_data_convertor->set_source_content(&block, 0, 5);
+ auto [status, column] = _olap_data_convertor->convert_column_data(0);
+ ASSERT_TRUE(status.ok());
+ ASSERT_NE(column, nullptr);
+
+ // test with null map
+ auto nullable_col = ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create());
+ auto nullable_dataTypeJsonb =
make_nullable(std::make_shared<DataTypeJsonb>());
+ auto serde1 = nullable_dataTypeJsonb->get_serde();
+
+ auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(1, nullable_col->size());
+
+ // insert null
+ nullable_col->insert_default();
+ ASSERT_EQ(2, nullable_col->size());
+
+ st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(3, nullable_col->size());
+
+ // deserialize null
+ Slice slice_null = "NULL";
+ st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(4, nullable_col->size());
+
+ st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(5, nullable_col->size());
+
+ // 2. put column into block
+ vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(),
+ nullable_dataTypeJsonb,
"jsonb_column_null");
+ block.clear();
+ block.insert(argument1);
+
+ // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert
column data to segment file data
+ _olap_data_convertor->reset();
+ _olap_data_convertor->add_column_data_convertor(jsonb_column);
+ _olap_data_convertor->set_source_content(&block, 0, 5);
+ auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
+ ASSERT_TRUE(status1.ok()) << status1.to_string();
+ ASSERT_NE(column1, nullptr);
+}
+
+TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
+ // 1. create jsonb column with serde
+ auto input = ColumnString::create();
+ auto dataTypeJsonb = std::make_shared<DataTypeJsonb>();
+ auto serde = dataTypeJsonb->get_serde();
+ vectorized::DataTypeSerDe::FormatOptions options;
+
+ // Test case 1
+ std::string str1 = "{\"key1\": \"value1\"}";
+ Slice slice1 = Slice(str1.data(), str1.length());
+ auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options);
+ ASSERT_TRUE(st1.ok());
+ ASSERT_EQ(input->size(), 1);
+
+ // Test case 2
+ std::string str2 = "{\"key2\": 12345}";
+ Slice slice2 = Slice(str2.data(), str2.length());
+ auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options);
+ ASSERT_TRUE(st2.ok());
+ ASSERT_EQ(input->size(), 2);
+
+ // Test case 3
+ std::string str3 = "{\"key3\": true}";
+ Slice slice3 = Slice(str3.data(), str3.length());
+ auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options);
+ ASSERT_TRUE(st3.ok());
+ ASSERT_EQ(input->size(), 3);
+
+ // Test case 4
+ std::string str4 = "{\"key4\": [1, 2, 3]}";
+ Slice slice4 = Slice(str4.data(), str4.length());
+ auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options);
+ ASSERT_TRUE(st4.ok());
+ ASSERT_EQ(input->size(), 4);
+ // invalid jsonb data
+ auto& data = input->get_chars();
+ data.emplace_back('s');
+
+ // Test case 5
+ std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}";
+ Slice slice5 = Slice(str5.data(), str5.length());
+ auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options);
+ ASSERT_TRUE(st5.ok());
+ ASSERT_EQ(input->size(), 5);
+
+ // 2. put column into block
+ vectorized::ColumnWithTypeAndName argument(input->assume_mutable(),
dataTypeJsonb,
+ "jsonb_column");
+ Block block;
+ block.insert(argument);
+
+ // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert
column data to segment file data
+ auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>();
+ TabletColumn jsonb_column = TabletColumn();
+ jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB);
+ _olap_data_convertor->add_column_data_convertor(jsonb_column);
+ _olap_data_convertor->set_source_content(&block, 0, 5);
+ auto [status, column] = _olap_data_convertor->convert_column_data(0);
+ // invalid will make error
+ ASSERT_FALSE(status.ok());
+ ASSERT_TRUE(status.to_string().find("invalid json binary value") !=
std::string::npos);
+ ASSERT_NE(column, nullptr);
+
+ // test with null map
+ auto nullable_col = ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create());
+ auto nullable_dataTypeJsonb =
make_nullable(std::make_shared<DataTypeJsonb>());
+ auto serde1 = nullable_dataTypeJsonb->get_serde();
+
+ auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(1, nullable_col->size());
+
+ // insert null
+ nullable_col->insert_default();
+ ASSERT_EQ(2, nullable_col->size());
+
+ st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(3, nullable_col->size());
+ // invalid jsonb data
+ auto string_data =
assert_cast<ColumnString*>(nullable_col->get_nested_column_ptr().get());
+ auto& dat = string_data->get_chars();
+ dat.emplace_back('s');
+
+ // deserialize null
+ Slice slice_null = "NULL";
+ st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(4, nullable_col->size());
+
+ st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3,
options);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(5, nullable_col->size());
+
+ // 2. put column into block
+ vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(),
+ nullable_dataTypeJsonb,
"jsonb_column_null");
+ block.clear();
+ block.insert(argument1);
+
+ // 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert
column data to segment file data
+ _olap_data_convertor->reset();
+ _olap_data_convertor->add_column_data_convertor(jsonb_column);
+ _olap_data_convertor->set_source_content(&block, 0, 5);
+ auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
+ ASSERT_FALSE(status.ok());
+ ASSERT_TRUE(status.to_string().find("invalid json binary value") !=
std::string::npos);
+ ASSERT_NE(column, nullptr);
+}
+
+} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]