This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-c108335-hive-sql in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-c108335-hive-sql by this push: new 37f916607de [Fix](Serde) Support hive compatible output format #49036 37f916607de is described below commit 37f916607de97456d1593819f2d10350e2864613 Author: BePPPower <fangtie...@selectdb.com> AuthorDate: Wed Mar 12 15:33:12 2025 +0800 [Fix](Serde) Support hive compatible output format #49036 --- be/src/vec/data_types/serde/data_type_array_serde.cpp | 3 ++- be/src/vec/data_types/serde/data_type_map_serde.cpp | 3 ++- be/src/vec/data_types/serde/data_type_number_serde.cpp | 9 ++++++++- be/src/vec/data_types/serde/data_type_serde.h | 15 +++++++++++++++ be/src/vec/data_types/serde/data_type_struct_serde.cpp | 3 ++- be/src/vec/sink/vmysql_result_writer.cpp | 16 ++++++++++++++++ .../java/org/apache/doris/nereids/NereidsPlanner.java | 1 + .../main/java/org/apache/doris/qe/SessionVariable.java | 8 ++++++-- gensrc/thrift/PaloInternalService.thrift | 3 ++- 9 files changed, 54 insertions(+), 7 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index d654e3ae22d..ea748cdd6c4 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -331,7 +331,8 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const IColumn& column, const auto end_arr_element = offsets[row_idx_of_col_arr]; for (auto j = begin_arr_element; j < end_arr_element; ++j) { if (j != begin_arr_element) { - if (0 != result.push_string(", ", 2)) { + if (0 != result.push_string(options.mysql_collection_delim.c_str(), + options.mysql_collection_delim.size())) { return Status::InternalError("pack mysql buffer failed."); } } diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index 7a4921623f3..d2c311b70d9 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -419,7 +419,8 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, const auto& offsets = map_column.get_offsets(); for (auto j = offsets[col_index - 1]; j < offsets[col_index]; ++j) { if (j != offsets[col_index - 1]) { - if (0 != result.push_string(", ", 2)) { + if (0 != result.push_string(options.mysql_collection_delim.c_str(), + options.mysql_collection_delim.size())) { return Status::InternalError("pack mysql buffer failed."); } } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index d58439c9dd8..69e3271715f 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -277,8 +277,15 @@ Status DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column, int buf_ret = 0; auto& data = assert_cast<const ColumnType&>(column).get_data(); const auto col_index = index_check_const(row_idx, col_const); - if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, UInt8>) { + if constexpr (std::is_same_v<T, Int8>) { buf_ret = result.push_tinyint(data[col_index]); + } else if constexpr (std::is_same_v<T, UInt8>) { + if (options.is_bool_value_num) { + buf_ret = result.push_tinyint(data[col_index]); + } else { + std::string bool_value = data[col_index] ? "true" : "false"; + result.push_string(bool_value.c_str(), bool_value.size()); + } } else if constexpr (std::is_same_v<T, Int16> || std::is_same_v<T, UInt16>) { buf_ret = result.push_smallint(data[col_index]); } else if constexpr (std::is_same_v<T, Int32> || std::is_same_v<T, UInt32>) { diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index f54c4604499..a53c3dd5136 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -180,6 +180,21 @@ public: const char* nested_string_wrapper; int wrapper_len; + /** + * mysql_collection_delim is used to separate elements in collection, such as array, map, struct + * It is used to write to mysql. + */ + std::string mysql_collection_delim = ", "; + + /** + * is_bool_value_num is used to display bool value in collection, such as array, map, struct + * eg, if set to true, the array<true> will be: + * [1] + * if set to false, the array<true> will be: + * [true] + */ + bool is_bool_value_num = true; + [[nodiscard]] char get_collection_delimiter( int hive_text_complex_type_delimiter_level) const { CHECK(0 <= hive_text_complex_type_delimiter_level && diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index ead4d0b2088..8cbfb3d360c 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -349,7 +349,8 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const IColumn& column, bool begin = true; for (size_t j = 0; j < elem_serdes_ptrs.size(); ++j) { if (!begin) { - if (0 != result.push_string(", ", 2)) { + if (0 != result.push_string(options.mysql_collection_delim.c_str(), + options.mysql_collection_delim.size())) { return Status::InternalError("pack mysql buffer failed."); } } diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index ac04c6367cb..dabf79960a8 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -163,6 +163,8 @@ Status VMysqlResultWriter<is_binary_format>::_set_options( _options.map_key_delim = ':'; _options.null_format = "null"; _options.null_len = 4; + _options.mysql_collection_delim = ", "; + _options.is_bool_value_num = true; break; case TSerdeDialect::PRESTO: // eg: @@ -173,6 +175,20 @@ Status VMysqlResultWriter<is_binary_format>::_set_options( _options.map_key_delim = '='; _options.null_format = "NULL"; _options.null_len = 4; + _options.mysql_collection_delim = ", "; + _options.is_bool_value_num = true; + break; + case TSerdeDialect::HIVE: + // eg: + // array: ["abc","def","",null] + // map: {"k1":null,"k2":"v3"} + _options.nested_string_wrapper = "\""; + _options.wrapper_len = 1; + _options.map_key_delim = ':'; + _options.null_format = "null"; + _options.null_len = 4; + _options.mysql_collection_delim = ","; + _options.is_bool_value_num = false; break; default: return Status::InternalError("unknown serde dialect: {}", serde_dialect); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 56adf5f2f82..5f6b74a597b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -810,6 +810,7 @@ public class NereidsPlanner extends Planner { statementContext.setFormatOptions(FormatOptions.getForPresto()); break; case "doris": + case "hive": statementContext.setFormatOptions(FormatOptions.getDefault()); break; default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 1a502151278..04279c665d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -4511,8 +4511,10 @@ public class SessionVariable implements Serializable, Writable { throw new UnsupportedOperationException("serdeDialect value is empty"); } - if (!serdeDialect.equalsIgnoreCase("doris") && !serdeDialect.equalsIgnoreCase("presto") - && !serdeDialect.equalsIgnoreCase("trino")) { + if (!serdeDialect.equalsIgnoreCase("doris") + && !serdeDialect.equalsIgnoreCase("presto") + && !serdeDialect.equalsIgnoreCase("trino") + && !serdeDialect.equalsIgnoreCase("hive")) { LOG.warn("serdeDialect value is invalid, the invalid value is {}", serdeDialect); throw new UnsupportedOperationException( "sqlDialect value is invalid, the invalid value is " + serdeDialect); @@ -4685,6 +4687,8 @@ public class SessionVariable implements Serializable, Writable { case "presto": case "trino": return TSerdeDialect.PRESTO; + case "hive": + return TSerdeDialect.HIVE; default: throw new IllegalArgumentException("Unknown serde dialect: " + serdeDialect); } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index d2805ba83cb..0bf401122a1 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -82,7 +82,8 @@ struct TResourceLimit { enum TSerdeDialect { DORIS = 0, - PRESTO = 1 + PRESTO = 1, + HIVE = 2 } // Query options that correspond to PaloService.PaloQueryOptions, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org