This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-c108335-hive-sql by 
this push:
     new 37f916607de [Fix](Serde) Support hive compatible output format #49036
37f916607de is described below

commit 37f916607de97456d1593819f2d10350e2864613
Author: BePPPower <fangtie...@selectdb.com>
AuthorDate: Wed Mar 12 15:33:12 2025 +0800

    [Fix](Serde) Support hive compatible output format #49036
---
 be/src/vec/data_types/serde/data_type_array_serde.cpp    |  3 ++-
 be/src/vec/data_types/serde/data_type_map_serde.cpp      |  3 ++-
 be/src/vec/data_types/serde/data_type_number_serde.cpp   |  9 ++++++++-
 be/src/vec/data_types/serde/data_type_serde.h            | 15 +++++++++++++++
 be/src/vec/data_types/serde/data_type_struct_serde.cpp   |  3 ++-
 be/src/vec/sink/vmysql_result_writer.cpp                 | 16 ++++++++++++++++
 .../java/org/apache/doris/nereids/NereidsPlanner.java    |  1 +
 .../main/java/org/apache/doris/qe/SessionVariable.java   |  8 ++++++--
 gensrc/thrift/PaloInternalService.thrift                 |  3 ++-
 9 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index d654e3ae22d..ea748cdd6c4 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -331,7 +331,8 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const 
IColumn& column,
     const auto end_arr_element = offsets[row_idx_of_col_arr];
     for (auto j = begin_arr_element; j < end_arr_element; ++j) {
         if (j != begin_arr_element) {
-            if (0 != result.push_string(", ", 2)) {
+            if (0 != result.push_string(options.mysql_collection_delim.c_str(),
+                                        
options.mysql_collection_delim.size())) {
                 return Status::InternalError("pack mysql buffer failed.");
             }
         }
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp 
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index 7a4921623f3..d2c311b70d9 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -419,7 +419,8 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const 
IColumn& column,
     const auto& offsets = map_column.get_offsets();
     for (auto j = offsets[col_index - 1]; j < offsets[col_index]; ++j) {
         if (j != offsets[col_index - 1]) {
-            if (0 != result.push_string(", ", 2)) {
+            if (0 != result.push_string(options.mysql_collection_delim.c_str(),
+                                        
options.mysql_collection_delim.size())) {
                 return Status::InternalError("pack mysql buffer failed.");
             }
         }
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index d58439c9dd8..69e3271715f 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -277,8 +277,15 @@ Status 
DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column,
     int buf_ret = 0;
     auto& data = assert_cast<const ColumnType&>(column).get_data();
     const auto col_index = index_check_const(row_idx, col_const);
-    if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, UInt8>) {
+    if constexpr (std::is_same_v<T, Int8>) {
         buf_ret = result.push_tinyint(data[col_index]);
+    } else if constexpr (std::is_same_v<T, UInt8>) {
+        if (options.is_bool_value_num) {
+            buf_ret = result.push_tinyint(data[col_index]);
+        } else {
+            std::string bool_value = data[col_index] ? "true" : "false";
+            result.push_string(bool_value.c_str(), bool_value.size());
+        }
     } else if constexpr (std::is_same_v<T, Int16> || std::is_same_v<T, 
UInt16>) {
         buf_ret = result.push_smallint(data[col_index]);
     } else if constexpr (std::is_same_v<T, Int32> || std::is_same_v<T, 
UInt32>) {
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index f54c4604499..a53c3dd5136 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -180,6 +180,21 @@ public:
         const char* nested_string_wrapper;
         int wrapper_len;
 
+        /**
+         * mysql_collection_delim is used to separate elements in collection, 
such as array, map, struct
+         * It is used to write to mysql.
+         */
+        std::string mysql_collection_delim = ", ";
+
+        /**
+         * is_bool_value_num is used to display bool value in collection, such 
as array, map, struct
+         * eg, if set to true, the array<true> will be:
+         *      [1]
+         *     if set to false, the array<true> will be:
+         *      [true]
+         */
+        bool is_bool_value_num = true;
+
         [[nodiscard]] char get_collection_delimiter(
                 int hive_text_complex_type_delimiter_level) const {
             CHECK(0 <= hive_text_complex_type_delimiter_level &&
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp 
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index ead4d0b2088..8cbfb3d360c 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -349,7 +349,8 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const 
IColumn& column,
     bool begin = true;
     for (size_t j = 0; j < elem_serdes_ptrs.size(); ++j) {
         if (!begin) {
-            if (0 != result.push_string(", ", 2)) {
+            if (0 != result.push_string(options.mysql_collection_delim.c_str(),
+                                        
options.mysql_collection_delim.size())) {
                 return Status::InternalError("pack mysql buffer failed.");
             }
         }
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp 
b/be/src/vec/sink/vmysql_result_writer.cpp
index ac04c6367cb..dabf79960a8 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -163,6 +163,8 @@ Status VMysqlResultWriter<is_binary_format>::_set_options(
         _options.map_key_delim = ':';
         _options.null_format = "null";
         _options.null_len = 4;
+        _options.mysql_collection_delim = ", ";
+        _options.is_bool_value_num = true;
         break;
     case TSerdeDialect::PRESTO:
         // eg:
@@ -173,6 +175,20 @@ Status VMysqlResultWriter<is_binary_format>::_set_options(
         _options.map_key_delim = '=';
         _options.null_format = "NULL";
         _options.null_len = 4;
+        _options.mysql_collection_delim = ", ";
+        _options.is_bool_value_num = true;
+        break;
+    case TSerdeDialect::HIVE:
+        // eg:
+        //  array: ["abc","def","",null]
+        //  map: {"k1":null,"k2":"v3"}
+        _options.nested_string_wrapper = "\"";
+        _options.wrapper_len = 1;
+        _options.map_key_delim = ':';
+        _options.null_format = "null";
+        _options.null_len = 4;
+        _options.mysql_collection_delim = ",";
+        _options.is_bool_value_num = false;
         break;
     default:
         return Status::InternalError("unknown serde dialect: {}", 
serde_dialect);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 56adf5f2f82..5f6b74a597b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -810,6 +810,7 @@ public class NereidsPlanner extends Planner {
                 
statementContext.setFormatOptions(FormatOptions.getForPresto());
                 break;
             case "doris":
+            case "hive":
                 statementContext.setFormatOptions(FormatOptions.getDefault());
                 break;
             default:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 1a502151278..04279c665d7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -4511,8 +4511,10 @@ public class SessionVariable implements Serializable, 
Writable {
             throw new UnsupportedOperationException("serdeDialect value is 
empty");
         }
 
-        if (!serdeDialect.equalsIgnoreCase("doris") && 
!serdeDialect.equalsIgnoreCase("presto")
-                && !serdeDialect.equalsIgnoreCase("trino")) {
+        if (!serdeDialect.equalsIgnoreCase("doris")
+                && !serdeDialect.equalsIgnoreCase("presto")
+                && !serdeDialect.equalsIgnoreCase("trino")
+                && !serdeDialect.equalsIgnoreCase("hive")) {
             LOG.warn("serdeDialect value is invalid, the invalid value is {}", 
serdeDialect);
             throw new UnsupportedOperationException(
                     "sqlDialect value is invalid, the invalid value is " + 
serdeDialect);
@@ -4685,6 +4687,8 @@ public class SessionVariable implements Serializable, 
Writable {
             case "presto":
             case "trino":
                 return TSerdeDialect.PRESTO;
+            case "hive":
+                return TSerdeDialect.HIVE;
             default:
                 throw new IllegalArgumentException("Unknown serde dialect: " + 
serdeDialect);
         }
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index d2805ba83cb..0bf401122a1 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -82,7 +82,8 @@ struct TResourceLimit {
 
 enum TSerdeDialect {
   DORIS = 0,
-  PRESTO = 1
+  PRESTO = 1,
+  HIVE = 2
 }
 
 // Query options that correspond to PaloService.PaloQueryOptions,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to