This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new b9d447741e8 Revert "branch-2.1: [Fix](Serde) Support hive compatible
output format #49036" (#49986)
b9d447741e8 is described below
commit b9d447741e8fe3b050673491f77042bfb1611d46
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Fri Apr 11 01:08:02 2025 -0700
Revert "branch-2.1: [Fix](Serde) Support hive compatible output format
#49036" (#49986)
Reverts apache/doris#49831
---
.../vec/data_types/serde/data_type_array_serde.cpp | 5 +-
.../vec/data_types/serde/data_type_map_serde.cpp | 7 +-
.../data_types/serde/data_type_number_serde.cpp | 9 +-
be/src/vec/data_types/serde/data_type_serde.h | 20 ----
.../data_types/serde/data_type_struct_serde.cpp | 5 +-
be/src/vec/sink/vmysql_result_writer.cpp | 16 ---
.../org/apache/doris/nereids/NereidsPlanner.java | 1 -
.../java/org/apache/doris/qe/SessionVariable.java | 10 +-
gensrc/thrift/PaloInternalService.thrift | 3 +-
.../datatype_p0/serde/test_serde_dialect_hive.out | Bin 2029 -> 0 bytes
.../serde/test_serde_dialect_hive.groovy | 107 ---------------------
11 files changed, 8 insertions(+), 175 deletions(-)
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index e5fc7461e45..872dd84d8c7 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -336,8 +336,7 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const
IColumn& column,
const auto end_arr_element = offsets[row_idx_of_col_arr];
for (int j = begin_arr_element; j < end_arr_element; ++j) {
if (j != begin_arr_element) {
- if (0 != result.push_string(options.mysql_collection_delim.c_str(),
-
options.mysql_collection_delim.size())) {
+ if (0 != result.push_string(", ", 2)) {
return Status::InternalError("pack mysql buffer failed.");
}
}
@@ -346,7 +345,6 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const
IColumn& column,
return Status::InternalError("pack mysql buffer failed.");
}
} else {
- ++options.level;
if (is_nested_string && options.wrapper_len > 0) {
if (0 != result.push_string(options.nested_string_wrapper,
options.wrapper_len)) {
return Status::InternalError("pack mysql buffer failed.");
@@ -360,7 +358,6 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const
IColumn& column,
RETURN_IF_ERROR(
nested_serde->write_column_to_mysql(data, result, j,
false, options));
}
- --options.level;
}
}
if (0 != result.push_string("]", 1)) {
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index bf018ce3a80..2140885942d 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -418,8 +418,7 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const
IColumn& column,
auto& offsets = map_column.get_offsets();
for (auto j = offsets[col_index - 1]; j < offsets[col_index]; ++j) {
if (j != offsets[col_index - 1]) {
- if (0 != result.push_string(options.mysql_collection_delim.c_str(),
-
options.mysql_collection_delim.size())) {
+ if (0 != result.push_string(", ", 2)) {
return Status::InternalError("pack mysql buffer failed.");
}
}
@@ -428,7 +427,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const
IColumn& column,
return Status::InternalError("pack mysql buffer failed.");
}
} else {
- ++options.level;
if (is_key_string && options.wrapper_len > 0) {
if (0 != result.push_string(options.nested_string_wrapper,
options.wrapper_len)) {
return Status::InternalError("pack mysql buffer failed.");
@@ -442,7 +440,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const
IColumn& column,
RETURN_IF_ERROR(key_serde->write_column_to_mysql(nested_keys_column, result, j,
false,
options));
}
- --options.level;
}
if (0 != result.push_string(&options.map_key_delim, 1)) {
return Status::InternalError("pack mysql buffer failed.");
@@ -452,7 +449,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const
IColumn& column,
return Status::InternalError("pack mysql buffer failed.");
}
} else {
- ++options.level;
if (is_val_string && options.wrapper_len > 0) {
if (0 != result.push_string(options.nested_string_wrapper,
options.wrapper_len)) {
return Status::InternalError("pack mysql buffer failed.");
@@ -466,7 +462,6 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const
IColumn& column,
RETURN_IF_ERROR(value_serde->write_column_to_mysql(nested_values_column,
result, j,
false,
options));
}
- --options.level;
}
}
if (0 != result.push_string("}", 1)) {
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index cd8b3d567e9..522cf02c75f 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -277,15 +277,8 @@ Status
DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column,
int buf_ret = 0;
auto& data = assert_cast<const ColumnType&>(column).get_data();
const auto col_index = index_check_const(row_idx, col_const);
- if constexpr (std::is_same_v<T, Int8>) {
+ if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, UInt8>) {
buf_ret = result.push_tinyint(data[col_index]);
- } else if constexpr (std::is_same_v<T, UInt8>) {
- if (options.level > 0 && !options.is_bool_value_num) {
- std::string bool_value = data[col_index] ? "true" : "false";
- result.push_string(bool_value.c_str(), bool_value.size());
- } else {
- buf_ret = result.push_tinyint(data[col_index]);
- }
} else if constexpr (std::is_same_v<T, Int16> || std::is_same_v<T,
UInt16>) {
buf_ret = result.push_smallint(data[col_index]);
} else if constexpr (std::is_same_v<T, Int32> || std::is_same_v<T,
UInt32>) {
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index b23a6a21501..7dedf30ac32 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -165,26 +165,6 @@ public:
const char* nested_string_wrapper;
int wrapper_len;
- /**
- * mysql_collection_delim is used to separate elements in collection,
such as array, map, struct
- * It is used to write to mysql.
- */
- std::string mysql_collection_delim = ", ";
-
- /**
- * is_bool_value_num is used to display bool value in collection, such
as array, map, struct
- * eg, if set to true, the array<true> will be:
- * [1]
- * if set to false, the array<true> will be:
- * [true]
- */
- bool is_bool_value_num = true;
-
- /**
- * Indicate the nested level of column. It is used to control some
behavior of serde
- */
- mutable int level = 0;
-
[[nodiscard]] char get_collection_delimiter(
int hive_text_complex_type_delimiter_level) const {
CHECK(0 <= hive_text_complex_type_delimiter_level &&
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index d95682e604c..d48f42e2227 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -348,8 +348,7 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const
IColumn& column,
bool begin = true;
for (size_t j = 0; j < elem_serdes_ptrs.size(); ++j) {
if (!begin) {
- if (0 != result.push_string(options.mysql_collection_delim.c_str(),
-
options.mysql_collection_delim.size())) {
+ if (0 != result.push_string(", ", 2)) {
return Status::InternalError("pack mysql buffer failed.");
}
}
@@ -373,7 +372,6 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const
IColumn& column,
return Status::InternalError("pack mysql buffer failed.");
}
} else {
- ++options.level;
if (remove_nullable(col.get_column_ptr(j))->is_column_string() &&
options.wrapper_len > 0) {
if (0 != result.push_string(options.nested_string_wrapper,
options.wrapper_len)) {
@@ -388,7 +386,6 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const
IColumn& column,
RETURN_IF_ERROR(elem_serdes_ptrs[j]->write_column_to_mysql(
col.get_column(j), result, col_index, false, options));
}
- --options.level;
}
begin = false;
}
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp
b/be/src/vec/sink/vmysql_result_writer.cpp
index 8ad1c276025..0cdf1b34034 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -123,8 +123,6 @@ Status VMysqlResultWriter<is_binary_format>::_set_options(
_options.map_key_delim = ':';
_options.null_format = "null";
_options.null_len = 4;
- _options.mysql_collection_delim = ", ";
- _options.is_bool_value_num = true;
break;
case TSerdeDialect::PRESTO:
// eg:
@@ -135,20 +133,6 @@ Status VMysqlResultWriter<is_binary_format>::_set_options(
_options.map_key_delim = '=';
_options.null_format = "NULL";
_options.null_len = 4;
- _options.mysql_collection_delim = ", ";
- _options.is_bool_value_num = true;
- break;
- case TSerdeDialect::HIVE:
- // eg:
- // array: ["abc","def","",null]
- // map: {"k1":null,"k2":"v3"}
- _options.nested_string_wrapper = "\"";
- _options.wrapper_len = 1;
- _options.map_key_delim = ':';
- _options.null_format = "null";
- _options.null_len = 4;
- _options.mysql_collection_delim = ",";
- _options.is_bool_value_num = false;
break;
default:
return Status::InternalError("unknown serde dialect: {}",
serde_dialect);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 57d88de7a4e..89a9d220be3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -677,7 +677,6 @@ public class NereidsPlanner extends Planner {
statementContext.setFormatOptions(FormatOptions.getForPresto());
break;
case "doris":
- case "hive":
statementContext.setFormatOptions(FormatOptions.getDefault());
break;
default:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 7d9b8416618..3f874077f51 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -4348,11 +4348,9 @@ public class SessionVariable implements Serializable,
Writable {
throw new UnsupportedOperationException("serdeDialect value is
empty");
}
- if (!serdeDialect.equalsIgnoreCase("doris")
- && !serdeDialect.equalsIgnoreCase("presto")
- && !serdeDialect.equalsIgnoreCase("trino")
- && !serdeDialect.equalsIgnoreCase("hive")) {
- LOG.warn("serde dialect value is invalid, the invalid value is
{}", serdeDialect);
+ if (!serdeDialect.equalsIgnoreCase("doris") &&
!serdeDialect.equalsIgnoreCase("presto")
+ && !serdeDialect.equalsIgnoreCase("trino")) {
+ LOG.warn("serdeDialect value is invalid, the invalid value is {}",
serdeDialect);
throw new UnsupportedOperationException(
"sqlDialect value is invalid, the invalid value is " +
serdeDialect);
}
@@ -4514,8 +4512,6 @@ public class SessionVariable implements Serializable,
Writable {
case "presto":
case "trino":
return TSerdeDialect.PRESTO;
- case "hive":
- return TSerdeDialect.HIVE;
default:
throw new IllegalArgumentException("Unknown serde dialect: " +
serdeDialect);
}
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index ac43d3a3dee..c612826836e 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -83,8 +83,7 @@ struct TResourceLimit {
enum TSerdeDialect {
DORIS = 0,
- PRESTO = 1,
- HIVE = 2
+ PRESTO = 1
}
// Query options that correspond to PaloService.PaloQueryOptions,
diff --git a/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out
b/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out
deleted file mode 100644
index 3ea1043cdf6..00000000000
Binary files
a/regression-test/data/datatype_p0/serde/test_serde_dialect_hive.out and
/dev/null differ
diff --git
a/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy
b/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy
deleted file mode 100644
index b8e3037d770..00000000000
--- a/regression-test/suites/datatype_p0/serde/test_serde_dialect_hive.groovy
+++ /dev/null
@@ -1,107 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_serde_dialect_hive", "p0") {
-
- sql """create database if not exists test_serde_dialect_hive;"""
- sql """use test_serde_dialect_hive;"""
- sql """drop table if exists test_serde_dialect_hive_tbl"""
- sql """
- create table if not exists test_serde_dialect_hive_tbl (
- c1 tinyint,
- c2 smallint,
- c3 int,
- c4 bigint,
- c5 largeint,
- c6 float,
- c7 double,
- c8 decimal(27, 9),
- c9 date,
- c10 datetime,
- c11 datetime(6),
- c12 ipv4,
- c13 ipv6,
- c14 string,
- c15 char(6),
- c16 varchar(1024),
- c17 boolean,
- c18 json,
- c19 array<int>,
- c20 array<double>,
- c21 array<decimal(10, 5)>,
- c22 array<string>,
- c23 array<map<string, string>>,
- c24 array<array<string>>,
- c25 array<struct<s_id:int(11), s_name:string, s_address:string>>,
- c26 array<struct<s_id:struct<k1:string, k2:decimal(10,2)>,
s_name:array<ipv4>, s_address:map<string, ipv6>>>,
- c27 map<string, string>,
- c28 map<string, array<array<string>>>,
- c29 map<int, map<string, array<array<string>>>>,
- c30 map<decimal(5, 3), array<struct<s_id:struct<k1:string,
k2:decimal(10,2)>, s_name:array<string>, s_address:map<string, string>>>>,
- c31 struct<s_id:int(11), s_name:string, s_address:string>,
- c32 struct<s_id:int(11), s_name:array<string>, s_address:string>,
- c33 array<date>,
- c34 array<datetime(3)>,
- c35 array<boolean>,
- c36 struct<s_id:int(11), s_name:string, s_gender:boolean>,
- c37 map<string, boolean>
- )
- distributed by random buckets 1
- properties("replication_num" = "1");
- """
-
- sql """
- insert into test_serde_dialect_hive_tbl
- (c1, c2,c3,
c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c27,c28,c29,c31,c32,c33,c34,c35,c36,c37)
- values(
- 1,2,3,4,5,1.1,2.0000,123456.123456789,"2024-06-30", "2024-06-30
10:10:11", "2024-06-30 10:10:11.123456",
- '59.50.185.152',
- 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff',
- 'this is a string with , and "',
- 'abc ef',
- ' 123ndedwdw',
- true,
- '[1, 2, 3, 4, 5]',
- [1,2,3,null,5],
- [1.1,2.1,3.1,null,5.00],
- [1.1,2.1,3.00000,null,5.12345],
- ['abc', 'de, f"', null, ''],
- [{'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'}, {'k1': 'v1', 'k2':
null, 'k3 , "abc':'', 'k4':'a , "a'}],
- [['abc', 'de, f"', null, ''],[],null],
- {'k1': 'v1', 'k2': null, 'k3':'', 'k4':'a , "a'},
- {'k1': [['abc', 'de, f"', null, ''],[],null], 'k2': null},
- {10: {'k1': [['abc', 'de, f"', null, ''],[],null]}, 11: null},
- named_struct('s_id', 100, 's_name', 'abc , "', 's_address', null),
- named_struct('s_id', null, 's_name', ['abc', 'de, f"', null, ''],
's_address', ''),
- ['2024-06-01',null,'2024-06-03'],
- ['2024-06-01 10:10:10',null,'2024-06-03 01:11:23.123'],
- [true, true, false, false, true, false, false],
- named_struct('s_id', 100, 's_name', 'abc , "', 's_gender', true),
- {'k1': false, 'k2': true, 'k3':true, 'k4': false}
- );
- """
-
- sql """set serde_dialect="doris";"""
- qt_sql01 """select * from test_serde_dialect_hive_tbl"""
- sql """set serde_dialect="hive";"""
- qt_sql01 """select * from test_serde_dialect_hive_tbl"""
-
- test {
- sql """set serde_dialect="invalid""""
- exception "sqlDialect value is invalid"
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]