github-actions[bot] commented on code in PR #24554:
URL: https://github.com/apache/doris/pull/24554#discussion_r1378436935
##########
be/src/vec/data_types/serde/data_type_serde.cpp:
##########
@@ -41,5 +42,56 @@ DataTypeSerDeSPtrs create_data_type_serdes(const
std::vector<SlotDescriptor*>& s
}
return serdes;
}
+
+void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array,
+ rapidjson::Value& target,
+
rapidjson::Document::AllocatorType& allocator) {
+ for (const vectorized::Field& item : array) {
+ target.SetArray();
+ rapidjson::Value val;
+ convert_field_to_rapidjson(item, val, allocator);
+ target.PushBack(val, allocator);
+ }
+}
+
+void DataTypeSerDe::convert_field_to_rapidjson(const vectorized::Field& field,
Review Comment:
warning: method 'convert_field_to_rapidjson' can be made static
[readability-convert-member-functions-to-static]
```suggestion
static void DataTypeSerDe::convert_field_to_rapidjson(const
vectorized::Field& field,
```
##########
be/src/vec/functions/function_cast.h:
##########
@@ -1892,12 +1995,119 @@
case TypeIndex::Float64:
return &ConvertImplNumberToJsonb<ColumnFloat64>::execute;
case TypeIndex::String:
- return &ConvertImplGenericFromString::execute;
+ if (string_as_jsonb_string) {
+ // We convert column string to jsonb type just add a string
jsonb field to dst column instead of parse
+ // each line in original string column.
+ return &ConvertImplStringToJsonbAsJsonbString::execute;
+ } else {
+ return &ConvertImplGenericFromString::execute;
+ }
Review Comment:
warning: do not use 'else' after 'return' [readability-else-after-return]
```suggestion
} return &ConvertImplGenericFromString::execute;
```
##########
be/src/vec/functions/function_cast.h:
##########
@@ -1865,17 +1963,22 @@
return &ConvertImplFromJsonb<TypeIndex::Int128,
ColumnInt128>::execute;
case TypeIndex::Float64:
return &ConvertImplFromJsonb<TypeIndex::Float64,
ColumnFloat64>::execute;
+ case TypeIndex::String:
+ if (!jsonb_string_as_string) {
+ // Conversion from String through parsing.
+ return &ConvertImplGenericToString::execute2;
+ } else {
+ return ConvertImplGenericFromJsonb::execute;
+ }
Review Comment:
warning: do not use 'else' after 'return' [readability-else-after-return]
```suggestion
} return ConvertImplGenericFromJsonb::execute;
```
##########
be/src/vec/data_types/serde/data_type_string_serde.cpp:
##########
@@ -257,5 +260,26 @@ Status DataTypeStringSerDe::write_column_to_orc(const
std::string& timezone, con
return Status::OK();
}
+void DataTypeStringSerDe::write_one_cell_to_json(const IColumn& column,
rapidjson::Value& result,
Review Comment:
warning: method 'write_one_cell_to_json' can be made static
[readability-convert-member-functions-to-static]
```suggestion
static void DataTypeStringSerDe::write_one_cell_to_json(const IColumn&
column, rapidjson::Value& result,
```
be/src/vec/data_types/serde/data_type_string_serde.cpp:264:
```diff
- int row_num) const {
+ int row_num) {
```
##########
be/src/vec/functions/function_cast.h:
##########
@@ -640,10 +644,104 @@ struct ConvertImplNumberToJsonb {
}
};
+struct ConvertImplStringToJsonbAsJsonbString {
+ static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ const size_t result, size_t input_rows_count) {
+ auto data_type_to = block.get_by_position(result).type;
+ const auto& col_with_type_and_name =
block.get_by_position(arguments[0]);
+ const IColumn& col_from = *col_with_type_and_name.column;
+ auto dst = ColumnString::create();
+ ColumnString* dst_str = assert_cast<ColumnString*>(dst.get());
+ const auto* from_string = assert_cast<const ColumnString*>(&col_from);
+ JsonbWriter writer;
+ for (size_t i = 0; i < input_rows_count; i++) {
+ auto str_ref = from_string->get_data_at(i);
+ writer.reset();
+ // write raw string to jsonb
+ writer.writeStartString();
+ writer.writeString(str_ref.data, str_ref.size);
+ writer.writeEndString();
+ dst_str->insert_data(writer.getOutput()->getBuffer(),
writer.getOutput()->getSize());
+ }
+ block.replace_by_position(result, std::move(dst));
+ return Status::OK();
+ }
+};
+
+struct ConvertImplGenericFromJsonb {
+ static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ const size_t result, size_t input_rows_count) {
+ auto data_type_to = block.get_by_position(result).type;
+ const auto& col_with_type_and_name =
block.get_by_position(arguments[0]);
+ const IColumn& col_from = *col_with_type_and_name.column;
+ if (const ColumnString* col_from_string =
check_and_get_column<ColumnString>(&col_from)) {
+ auto col_to = data_type_to->create_column();
+
+ size_t size = col_from.size();
+ col_to->reserve(size);
+
+ ColumnUInt8::MutablePtr col_null_map_to =
ColumnUInt8::create(size);
+ ColumnUInt8::Container* vec_null_map_to =
&col_null_map_to->get_data();
+ const bool is_complex = is_complex_type(data_type_to);
+ const bool is_dst_string = is_string_or_fixed_string(data_type_to);
+ for (size_t i = 0; i < size; ++i) {
+ const auto& val = col_from_string->get_data_at(i);
+ JsonbDocument* doc = JsonbDocument::createDocument(val.data,
val.size);
+ if (UNLIKELY(!doc || !doc->getValue())) {
Review Comment:
warning: boolean expression can be simplified by DeMorgan's theorem
[readability-simplify-boolean-expr]
```cpp
if (UNLIKELY(!doc || !doc->getValue())) {
^
```
<details>
<summary>Additional context</summary>
**be/src/common/compiler_util.h:35:** expanded from macro 'UNLIKELY'
```cpp
#define UNLIKELY(expr) __builtin_expect(!!(expr), 0)
^
```
</details>
##########
be/src/vec/data_types/serde/data_type_nullable_serde.cpp:
##########
@@ -342,5 +342,30 @@ Status DataTypeNullableSerDe::write_column_to_orc(const
std::string& timezone,
const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N";
const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE = "null";
+void DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column,
rapidjson::Value& result,
+
rapidjson::Document::AllocatorType& allocator,
+ int row_num) const {
+ auto& col = static_cast<const ColumnNullable&>(column);
+ auto& nested_col = col.get_nested_column();
+ if (col.is_null_at(row_num)) {
+ result.SetNull();
+ } else {
+ nested_serde->write_one_cell_to_json(nested_col, result, allocator,
row_num);
+ }
+}
+
+void DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column,
+ const rapidjson::Value&
result) const {
+ auto& col = static_cast<ColumnNullable&>(column);
Review Comment:
warning: method 'read_one_cell_from_json' can be made static
[readability-convert-member-functions-to-static]
```suggestion
,static
{
```
##########
be/src/vec/data_types/serde/data_type_string_serde.cpp:
##########
@@ -257,5 +260,26 @@
return Status::OK();
}
+void DataTypeStringSerDe::write_one_cell_to_json(const IColumn& column,
rapidjson::Value& result,
+
rapidjson::Document::AllocatorType& allocator,
+ int row_num) const {
+ const auto& col = static_cast<const ColumnString&>(column);
+ const auto& data_ref = col.get_data_at(row_num);
+ result.SetString(data_ref.data, data_ref.size);
+}
+
+void DataTypeStringSerDe::read_one_cell_from_json(IColumn& column,
+ const rapidjson::Value&
result) const {
Review Comment:
warning: method 'read_one_cell_from_json' can be made static
[readability-convert-member-functions-to-static]
```suggestion
static void DataTypeStringSerDe::read_one_cell_from_json(IColumn& column,
const rapidjson::Value&
result) {
```
##########
be/src/vec/functions/function_cast.h:
##########
@@ -640,10 +644,104 @@
}
};
+struct ConvertImplStringToJsonbAsJsonbString {
+ static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ const size_t result, size_t input_rows_count) {
+ auto data_type_to = block.get_by_position(result).type;
+ const auto& col_with_type_and_name =
block.get_by_position(arguments[0]);
+ const IColumn& col_from = *col_with_type_and_name.column;
+ auto dst = ColumnString::create();
+ ColumnString* dst_str = assert_cast<ColumnString*>(dst.get());
+ const auto* from_string = assert_cast<const ColumnString*>(&col_from);
+ JsonbWriter writer;
+ for (size_t i = 0; i < input_rows_count; i++) {
+ auto str_ref = from_string->get_data_at(i);
+ writer.reset();
+ // write raw string to jsonb
+ writer.writeStartString();
+ writer.writeString(str_ref.data, str_ref.size);
+ writer.writeEndString();
+ dst_str->insert_data(writer.getOutput()->getBuffer(),
writer.getOutput()->getSize());
+ }
+ block.replace_by_position(result, std::move(dst));
+ return Status::OK();
+ }
+};
+
+struct ConvertImplGenericFromJsonb {
+ static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ const size_t result, size_t input_rows_count) {
+ auto data_type_to = block.get_by_position(result).type;
+ const auto& col_with_type_and_name =
block.get_by_position(arguments[0]);
+ const IColumn& col_from = *col_with_type_and_name.column;
+ if (const ColumnString* col_from_string =
check_and_get_column<ColumnString>(&col_from)) {
+ auto col_to = data_type_to->create_column();
+
+ size_t size = col_from.size();
+ col_to->reserve(size);
+
+ ColumnUInt8::MutablePtr col_null_map_to =
ColumnUInt8::create(size);
+ ColumnUInt8::Container* vec_null_map_to =
&col_null_map_to->get_data();
+ const bool is_complex = is_complex_type(data_type_to);
+ const bool is_dst_string = is_string_or_fixed_string(data_type_to);
+ for (size_t i = 0; i < size; ++i) {
+ const auto& val = col_from_string->get_data_at(i);
+ JsonbDocument* doc = JsonbDocument::createDocument(val.data,
val.size);
+ if (UNLIKELY(!doc || !doc->getValue())) {
+ (*vec_null_map_to)[i] = 1;
+ col_to->insert_default();
+ continue;
+ }
+
+ // value is NOT necessary to be deleted since JsonbValue will
not allocate memory
+ JsonbValue* value = doc->getValue();
+ if (UNLIKELY(!value)) {
+ (*vec_null_map_to)[i] = 1;
+ col_to->insert_default();
+ continue;
+ }
+ // Note: here we should handle the null element
+ if (val.size == 0) {
+ col_to->insert_default();
+ // empty string('') is an invalid format for complex type,
set null_map to 1
+ if (is_complex) {
+ (*vec_null_map_to)[i] = 1;
+ }
+ continue;
+ }
+ // add string to string column
+ if (context->jsonb_string_as_string() && is_dst_string &&
value->isString()) {
+ auto blob = static_cast<const JsonbBlobVal*>(value);
Review Comment:
warning: 'auto blob' can be declared as 'const auto *blob'
[readability-qualified-auto]
```suggestion
const auto *blob = static_cast<const
JsonbBlobVal*>(value);
```
##########
be/src/vec/olap/olap_data_convertor.h:
##########
@@ -198,6 +200,7 @@ class OlapBlockDataConvertor {
size_t num_rows) override;
const void* get_data() const override;
const void* get_data_at(size_t offset) const override;
+ Status convert_to_olap(const UInt8* null_map, const ColumnString*
column_array);
Review Comment:
warning: function
'std::doris::vectorized::OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap'
has a definition with different parameter names
[readability-inconsistent-declaration-parameter-name]
```cpp
Status convert_to_olap(const UInt8* null_map, const ColumnString*
column_array);
^
```
<details>
<summary>Additional context</summary>
**be/src/vec/olap/olap_data_convertor.cpp:604:** the definition seen here
```cpp
Status
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
^
```
**be/src/vec/olap/olap_data_convertor.h:202:** differing parameters are
named here: ('column_array'), in definition: ('column_string')
```cpp
Status convert_to_olap(const UInt8* null_map, const ColumnString*
column_array);
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]