xiaokang commented on code in PR #21555:
URL: https://github.com/apache/doris/pull/21555#discussion_r1257406548
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
public:
// for jsonb_extract_string
+ static Status vector_vector_v2(
+ FunctionContext* context, const ColumnString::Chars& ldata,
+ const ColumnString::Offsets& loffsets, const bool& json_data_const,
+ const std::vector<const ColumnString*>& rdata_columns, // here we
can support more paths
+ const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+ ColumnString::Offsets& res_offsets, NullMap& null_map, bool&
is_invalid_json_path) {
+ size_t input_rows_count = json_data_const ? rdata_columns.size() :
loffsets.size();
+ res_offsets.resize(input_rows_count);
+
+ std::unique_ptr<JsonbWriter> writer;
+ if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+ writer.reset(new JsonbWriter());
+ }
+
+ std::unique_ptr<JsonbToJson> formater;
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, res_data, res_offsets, null_map);
+ continue;
+ }
+ size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+ size_t l_size = loffsets[index_check_const(i, json_data_const)] -
l_off;
+ const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+ if (rdata_columns.size() == 1) { // just return origin value
+ const ColumnString* path_col = rdata_columns[0];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i, path_const[0]) -
1];
+ size_t r_size = roffsets[index_check_const(i, path_const[0])]
- r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ inner_loop_impl(i, res_data, res_offsets, null_map, writer,
formater, l_raw, l_size,
+ r_raw, r_size, is_invalid_json_path);
+ } else { // will make array string to user
+ writer->reset();
+ writer->writeStartArray();
+ for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+ const ColumnString* path_col = rdata_columns[pi];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i,
path_const[pi]) - 1];
+ size_t r_size = roffsets[index_check_const(i,
path_const[pi])] - r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ String path(r_raw, r_size);
+ // doc is NOT necessary to be deleted since JsonbDocument
will not allocate memory
+ JsonbDocument* doc = JsonbDocument::createDocument(l_raw,
l_size);
+ if (UNLIKELY(!doc || !doc->getValue())) {
+ writer->writeNull();
+ continue;
+ }
+ // value is NOT necessary to be deleted since JsonbValue
will not allocate memory
+ JsonbValue* value =
+ doc->getValue()->findPath(r_raw, r_size,
is_invalid_json_path, nullptr);
+ // if not valid json path , should return error message to
user
+ if (is_invalid_json_path) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(rdata.data()),
+ rdata.size()));
+ }
+ if (UNLIKELY(!value)) {
+ writer->writeNull();
+ continue;
+ }
+ writer->writeValue(value);
Review Comment:
put it in else branch
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
public:
// for jsonb_extract_string
+ static Status vector_vector_v2(
+ FunctionContext* context, const ColumnString::Chars& ldata,
+ const ColumnString::Offsets& loffsets, const bool& json_data_const,
+ const std::vector<const ColumnString*>& rdata_columns, // here we
can support more paths
+ const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+ ColumnString::Offsets& res_offsets, NullMap& null_map, bool&
is_invalid_json_path) {
+ size_t input_rows_count = json_data_const ? rdata_columns.size() :
loffsets.size();
+ res_offsets.resize(input_rows_count);
+
+ std::unique_ptr<JsonbWriter> writer;
+ if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+ writer.reset(new JsonbWriter());
+ }
+
+ std::unique_ptr<JsonbToJson> formater;
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, res_data, res_offsets, null_map);
+ continue;
+ }
+ size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+ size_t l_size = loffsets[index_check_const(i, json_data_const)] -
l_off;
+ const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+ if (rdata_columns.size() == 1) { // just return origin value
+ const ColumnString* path_col = rdata_columns[0];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i, path_const[0]) -
1];
+ size_t r_size = roffsets[index_check_const(i, path_const[0])]
- r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ inner_loop_impl(i, res_data, res_offsets, null_map, writer,
formater, l_raw, l_size,
+ r_raw, r_size, is_invalid_json_path);
+ } else { // will make array string to user
+ writer->reset();
+ writer->writeStartArray();
+ for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+ const ColumnString* path_col = rdata_columns[pi];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i,
path_const[pi]) - 1];
+ size_t r_size = roffsets[index_check_const(i,
path_const[pi])] - r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ String path(r_raw, r_size);
Review Comment:
Use string_view to avoid copy if it's really used.
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
public:
// for jsonb_extract_string
+ static Status vector_vector_v2(
+ FunctionContext* context, const ColumnString::Chars& ldata,
+ const ColumnString::Offsets& loffsets, const bool& json_data_const,
+ const std::vector<const ColumnString*>& rdata_columns, // here we
can support more paths
+ const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+ ColumnString::Offsets& res_offsets, NullMap& null_map, bool&
is_invalid_json_path) {
+ size_t input_rows_count = json_data_const ? rdata_columns.size() :
loffsets.size();
+ res_offsets.resize(input_rows_count);
+
+ std::unique_ptr<JsonbWriter> writer;
+ if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+ writer.reset(new JsonbWriter());
+ }
+
+ std::unique_ptr<JsonbToJson> formater;
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, res_data, res_offsets, null_map);
+ continue;
+ }
+ size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+ size_t l_size = loffsets[index_check_const(i, json_data_const)] -
l_off;
+ const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+ if (rdata_columns.size() == 1) { // just return origin value
+ const ColumnString* path_col = rdata_columns[0];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i, path_const[0]) -
1];
+ size_t r_size = roffsets[index_check_const(i, path_const[0])]
- r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ inner_loop_impl(i, res_data, res_offsets, null_map, writer,
formater, l_raw, l_size,
+ r_raw, r_size, is_invalid_json_path);
+ } else { // will make array string to user
+ writer->reset();
+ writer->writeStartArray();
+ for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+ const ColumnString* path_col = rdata_columns[pi];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i,
path_const[pi]) - 1];
+ size_t r_size = roffsets[index_check_const(i,
path_const[pi])] - r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ String path(r_raw, r_size);
+ // doc is NOT necessary to be deleted since JsonbDocument
will not allocate memory
+ JsonbDocument* doc = JsonbDocument::createDocument(l_raw,
l_size);
+ if (UNLIKELY(!doc || !doc->getValue())) {
+ writer->writeNull();
+ continue;
+ }
+ // value is NOT necessary to be deleted since JsonbValue
will not allocate memory
+ JsonbValue* value =
+ doc->getValue()->findPath(r_raw, r_size,
is_invalid_json_path, nullptr);
+ // if not valid json path , should return error message to
user
+ if (is_invalid_json_path) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(rdata.data()),
+ rdata.size()));
+ }
+ if (UNLIKELY(!value)) {
+ writer->writeNull();
+ continue;
+ }
+ writer->writeValue(value);
+ }
+ writer->writeEndArray();
+ if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
+
writer->getOutput()->getSize()),
+ i, res_data, res_offsets);
+ } else {
+ // transform jsonb to json
+ if (!formater) {
+ formater.reset(new JsonbToJson());
+ }
+ StringOP::push_value_string(
+
formater->to_json_string(writer->getOutput()->getBuffer(),
Review Comment:
In this branch, writer is not initialized since ReturnType is not Jsonb. In
fact, I think format to string is not necessary, since planner will
automaticlly add cast from JSONB to String if necessary.
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -503,6 +508,93 @@ struct JsonbExtractStringImpl {
public:
// for jsonb_extract_string
+ static Status vector_vector_v2(
+ FunctionContext* context, const ColumnString::Chars& ldata,
+ const ColumnString::Offsets& loffsets, const bool& json_data_const,
+ const std::vector<const ColumnString*>& rdata_columns, // here we
can support more paths
+ const std::vector<bool>& path_const, ColumnString::Chars& res_data,
+ ColumnString::Offsets& res_offsets, NullMap& null_map, bool&
is_invalid_json_path) {
+ size_t input_rows_count = json_data_const ? rdata_columns.size() :
loffsets.size();
+ res_offsets.resize(input_rows_count);
+
+ std::unique_ptr<JsonbWriter> writer;
+ if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+ writer.reset(new JsonbWriter());
+ }
+
+ std::unique_ptr<JsonbToJson> formater;
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, res_data, res_offsets, null_map);
+ continue;
+ }
+ size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
+ size_t l_size = loffsets[index_check_const(i, json_data_const)] -
l_off;
+ const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+ if (rdata_columns.size() == 1) { // just return origin value
+ const ColumnString* path_col = rdata_columns[0];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i, path_const[0]) -
1];
+ size_t r_size = roffsets[index_check_const(i, path_const[0])]
- r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ inner_loop_impl(i, res_data, res_offsets, null_map, writer,
formater, l_raw, l_size,
+ r_raw, r_size, is_invalid_json_path);
+ } else { // will make array string to user
+ writer->reset();
+ writer->writeStartArray();
+ for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
+ const ColumnString* path_col = rdata_columns[pi];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets =
path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i,
path_const[pi]) - 1];
+ size_t r_size = roffsets[index_check_const(i,
path_const[pi])] - r_off;
+ const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
+ String path(r_raw, r_size);
Review Comment:
unused variable path
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]