This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 27bb79b33bc branch-3.1: [feat](catalog) Support reading Hive table with MultiDelimitSerDe #51936 (#52772) 27bb79b33bc is described below commit 27bb79b33bcdff6e6ed857c43168ff3c762f07b7 Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Sat Jul 5 20:24:29 2025 +0800 branch-3.1: [feat](catalog) Support reading Hive table with MultiDelimitSerDe #51936 (#52772) bp #51936 Co-authored-by: lw112 <131352377+felixw...@users.noreply.github.com> --- be/src/vec/exec/format/text/text_reader.cpp | 56 ++++++++++++ be/src/vec/exec/format/text/text_reader.h | 3 + .../format/text/hive_text_field_splitter_test.cpp | 97 +++++++++++++++++++++ .../multi_delimit_serde/create_table.hql | 78 +++++++++++++++++ .../datasource/hive/HiveMetaStoreClientHelper.java | 1 + .../doris/datasource/hive/HiveProperties.java | 9 +- .../doris/datasource/hive/source/HiveScanNode.java | 12 ++- .../org/apache/doris/planner/HiveTableSink.java | 10 ++- .../hive/test_multi_delimit_serde.out | Bin 0 -> 484 bytes .../hive/test_multi_delimit_serde.groovy | 84 ++++++++++++++++++ 10 files changed, 343 insertions(+), 7 deletions(-) diff --git a/be/src/vec/exec/format/text/text_reader.cpp b/be/src/vec/exec/format/text/text_reader.cpp index cf33623d320..7913a9bdb2b 100644 --- a/be/src/vec/exec/format/text/text_reader.cpp +++ b/be/src/vec/exec/format/text/text_reader.cpp @@ -21,6 +21,9 @@ #include <gen_cpp/Types_types.h> #include <glog/logging.h> +#include <cstddef> +#include <vector> + #include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "exec/line_reader.h" @@ -39,6 +42,15 @@ namespace doris::vectorized { #include "common/compile_check_begin.h" void HiveTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>* splitted_values) { + if (_value_sep_len == 1) { + _split_field_single_char(line, splitted_values); + } else { + _split_field_multi_char(line, splitted_values); + } +} + +void HiveTextFieldSplitter::_split_field_single_char(const Slice& line, + std::vector<Slice>* splitted_values) { const char* data = line.data; const size_t size = line.size; size_t value_start = 0; @@ -55,6 +67,50 @@ void HiveTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>* spli process_value_func(data, value_start, size - value_start, _trimming_char, splitted_values); } +void HiveTextFieldSplitter::_split_field_multi_char(const Slice& line, + std::vector<Slice>* splitted_values) { + const char* data = line.data; + const size_t size = line.size; + size_t start = 0; + + std::vector<int> next(_value_sep_len); + next[0] = -1; + for (int i = 1, j = -1; i < (int)_value_sep_len; i++) { + while (j >= 0 && _value_sep[i] != _value_sep[j + 1]) { + j = next[j]; + } + if (_value_sep[i] == _value_sep[j + 1]) { + j++; + } + next[i] = j; + } + + // KMP search + for (int i = 0, j = -1; i < (int)size; i++) { + while (j >= 0 && data[i] != _value_sep[j + 1]) { + j = next[j]; + } + if (data[i] == _value_sep[j + 1]) { + j++; + } + if (j == (int)_value_sep_len - 1) { + size_t curpos = i - _value_sep_len + 1; + if (_escape_char != 0 && curpos > 0 && data[curpos - 1] == _escape_char) { + j = next[j]; + continue; + } + + if (curpos >= start) { + process_value_func(data, start, curpos - start, _trimming_char, splitted_values); + start = curpos + _value_sep_len; + } + + j = next[j]; + } + } + process_value_func(data, start, size - start, _trimming_char, splitted_values); +} + TextReader::TextReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter, const TFileScanRangeParams& params, const TFileRangeDesc& range, const std::vector<SlotDescriptor*>& file_slot_descs, io::IOContext* io_ctx) diff --git a/be/src/vec/exec/format/text/text_reader.h b/be/src/vec/exec/format/text/text_reader.h index c1a873de5f2..88d6746a19f 100644 --- a/be/src/vec/exec/format/text/text_reader.h +++ b/be/src/vec/exec/format/text/text_reader.h @@ -43,6 +43,9 @@ public: void do_split(const Slice& line, std::vector<Slice>* splitted_values); private: + void _split_field_single_char(const Slice& line, std::vector<Slice>* splitted_values); + void _split_field_multi_char(const Slice& line, std::vector<Slice>* splitted_values); + std::string _value_sep; char _escape_char; }; diff --git a/be/test/vec/exec/format/text/hive_text_field_splitter_test.cpp b/be/test/vec/exec/format/text/hive_text_field_splitter_test.cpp new file mode 100644 index 00000000000..814af4554e4 --- /dev/null +++ b/be/test/vec/exec/format/text/hive_text_field_splitter_test.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include <string> +#include <vector> + +#include "vec/exec/format/text/text_reader.h" + +namespace doris::vectorized { + +class HiveTextFieldSplitterTest : public testing::Test { +protected: + void verify_field_split(const std::string& input, const std::string& delimiter, + const std::vector<std::string>& expected_fields, char escape_char = 0) { + HiveTextFieldSplitter splitter(false, false, delimiter, delimiter.size(), 0, escape_char); + Slice line(input.data(), input.size()); + std::vector<Slice> splitted_values; + + splitter.do_split(line, &splitted_values); + + ASSERT_EQ(expected_fields.size(), splitted_values.size()) + << "Input: " << input << ", Delimiter: " << delimiter; + + for (size_t i = 0; i < expected_fields.size(); ++i) { + std::string actual(splitted_values[i].data, splitted_values[i].size); + EXPECT_EQ(expected_fields[i], actual) << "Field " << i << " mismatch. Input: " << input + << ", Delimiter: " << delimiter; + } + } +}; + +// Test single character delimiter (basic functionality) +TEST_F(HiveTextFieldSplitterTest, SingleCharDelimiter) { + verify_field_split("a,b,c", ",", {"a", "b", "c"}); + verify_field_split("1|2|3|4", "|", {"1", "2", "3", "4"}); + verify_field_split("", ",", {""}); + verify_field_split(",", ",", {"", ""}); + verify_field_split("a,", ",", {"a", ""}); + verify_field_split(",b", ",", {"", "b"}); +} + +// Test multi-character delimiter (core functionality for MultiDelimitSerDe) +TEST_F(HiveTextFieldSplitterTest, MultiCharDelimiter) { + verify_field_split("a||b||c", "||", {"a", "b", "c"}); + verify_field_split("1|+|2|+|3", "|+|", {"1", "2", "3"}); + verify_field_split("field1|+|field2|+|field3", "|+|", {"field1", "field2", "field3"}); + + verify_field_split("", "||", {""}); + verify_field_split("||", "||", {"", ""}); + verify_field_split("a||", "||", {"a", ""}); + verify_field_split("||b", "||", {"", "b"}); +} + +// Test overlapping patterns in delimiter - these are the problematic cases +TEST_F(HiveTextFieldSplitterTest, OverlappingPatterns) { + verify_field_split("ab\\ababab", "abab", {"ab\\", "ab"}); + + verify_field_split("aaaaaaa", "aaa", {"", "", "a"}); + + verify_field_split("abcabcabc", "abcabc", {"", "abc"}); + + verify_field_split("ababababab", "abab", {"", "", "ab"}); +} + +// Test escape character functionality +TEST_F(HiveTextFieldSplitterTest, EscapeCharacter) { + verify_field_split("a\\,b,c", ",", {"a\\,b", "c"}, '\\'); + verify_field_split("a\\||b||c", "||", {"a\\||b", "c"}, '\\'); + verify_field_split("field1\\|+|field2|+|field3", "|+|", {"field1\\|+|field2", "field3"}, '\\'); +} + +// Test real-world scenarios +TEST_F(HiveTextFieldSplitterTest, RealWorldScenarios) { + verify_field_split("1|+|100|+|test1", "|+|", {"1", "100", "test1"}); + verify_field_split("u...@domain.com|+|John Doe|+|Manager", "|+|", + {"u...@domain.com", "John Doe", "Manager"}); + verify_field_split("|+||+|", "|+|", {"", "", ""}); + verify_field_split("a|+||+|c", "|+|", {"a", "", "c"}); +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/regression/multi_delimit_serde/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/regression/multi_delimit_serde/create_table.hql new file mode 100644 index 00000000000..cdaead8edf9 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/regression/multi_delimit_serde/create_table.hql @@ -0,0 +1,78 @@ +CREATE DATABASE IF NOT EXISTS regression; +USE regression; + +CREATE TABLE `multi_delimit_test`( + `k1` int, + `k2` int, + `name` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'='|+|', + 'mapkey.delim'='@', + 'collection.delim'=':', + 'serialization.format'='1', + 'serialization.encoding'='UTF-8') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION '/user/doris/suites/regression/multi_delimit_test' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1692719456'); + +CREATE TABLE `multi_delimit_test2`( + `id` int, + `value` double, + `description` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'='||', + 'serialization.format'='1') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION '/user/doris/suites/regression/multi_delimit_test2' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1692719456'); + +-- Test table with array and map types to test collection.delim and mapkey.delim +CREATE TABLE `multi_delimit_complex_test`( + `id` int, + `name` string, + `tags` array<string>, + `properties` map<string,string>, + `nested_array` array<array<int>>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'='|+|', + 'mapkey.delim'='@', + 'collection.delim'=':', + 'serialization.format'='1', + 'serialization.encoding'='UTF-8') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION '/user/doris/suites/regression/multi_delimit_complex_test' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1692719456'); + +INSERT INTO multi_delimit_test VALUES + (1, 100, 'test1'), + (2, 200, 'test2'), + (3, 300, 'test3'); + +INSERT INTO multi_delimit_test2 VALUES + (1, 1.5, 'description1'), + (2, 2.5, 'description2'), + (3, 3.5, 'description3'); + +-- Insert test data with complex types +-- Format: id|+|name|+|array_elements:separated:by:colon|+|key1@value1:key2@value2|+|nested_array_format +INSERT INTO multi_delimit_complex_test VALUES + (1, 'user1', array('tag1', 'tag2', 'tag3'), map('key1', 'value1', 'key2', 'value2'), array(array(1, 2), array(3, 4))), + (2, 'user2', array('tagA', 'tagB'), map('prop1', 'val1'), array(array(5, 6))); \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java index f59143961e6..eec4349669c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java @@ -98,6 +98,7 @@ public class HiveMetaStoreClientHelper { public static final String OPENX_JSON_SERDE = "org.openx.data.jsonserde.JsonSerDe"; public static final String HIVE_TEXT_SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; public static final String HIVE_CSV_SERDE = "org.apache.hadoop.hive.serde2.OpenCSVSerde"; + public static final String HIVE_MULTI_DELIMIT_SERDE = "org.apache.hadoop.hive.serde2.MultiDelimitSerDe"; public enum HiveFileFormat { TEXT_FILE(0, "text"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java index bdc8e0cacd9..1be78e41b89 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java @@ -81,11 +81,16 @@ public class HiveProperties { PROP_SKIP_FOOTER_COUNT); public static String getFieldDelimiter(Table table) { + return getFieldDelimiter(table, false); + } + + public static String getFieldDelimiter(Table table, boolean supportMultiChar) { // This method is used for text format. Optional<String> fieldDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_FIELD_DELIMITER); Optional<String> serFormat = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SERIALIZATION_FORMAT); - return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat)); + String delimiter = HiveMetaStoreClientHelper.firstPresentOrDefault( + DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat); + return supportMultiChar ? delimiter : HiveMetaStoreClientHelper.getByte(delimiter); } public static String getSeparatorChar(Table table) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index e096ee30e72..0b316efaa3a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -431,6 +431,8 @@ public class HiveScanNode extends FileQueryScanNode { type = TFileFormatType.FORMAT_TEXT; } else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_CSV_SERDE)) { type = TFileFormatType.FORMAT_CSV_PLAIN; + } else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_MULTI_DELIMIT_SERDE)) { + type = TFileFormatType.FORMAT_TEXT; } else { throw new UserException("Unsupported hive table serde: " + serDeLib); } @@ -451,11 +453,13 @@ public class HiveScanNode extends FileQueryScanNode { // TODO: support skip footer count fileAttributes.setSkipLines(HiveProperties.getSkipHeaderCount(table)); String serDeLib = table.getSd().getSerdeInfo().getSerializationLib(); - if (serDeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) { + if (serDeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") + || serDeLib.equals(HiveMetaStoreClientHelper.HIVE_MULTI_DELIMIT_SERDE)) { TFileTextScanRangeParams textParams = new TFileTextScanRangeParams(); - // set properties of LazySimpleSerDe - // 1. set column separator - textParams.setColumnSeparator(HiveProperties.getFieldDelimiter(table)); + // set properties of LazySimpleSerDe and MultiDelimitSerDe + // 1. set column separator (MultiDelimitSerDe supports multi-character delimiters) + boolean supportMultiChar = serDeLib.equals(HiveMetaStoreClientHelper.HIVE_MULTI_DELIMIT_SERDE); + textParams.setColumnSeparator(HiveProperties.getFieldDelimiter(table, supportMultiChar)); // 2. set line delimiter textParams.setLineDelimiter(HiveProperties.getLineDelimiter(table)); // 3. set mapkv delimiter diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java index bb4786f226a..fdbc1ffc948 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java @@ -25,6 +25,7 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; import org.apache.doris.datasource.hive.HiveProperties; import org.apache.doris.nereids.trees.plans.commands.insert.HiveInsertCommandContext; import org.apache.doris.nereids.trees.plans.commands.insert.InsertCommandContext; @@ -59,6 +60,7 @@ public class HiveTableSink extends BaseExternalTableDataSink { add(TFileFormatType.FORMAT_CSV_PLAIN); add(TFileFormatType.FORMAT_ORC); add(TFileFormatType.FORMAT_PARQUET); + add(TFileFormatType.FORMAT_TEXT); }}; public HiveTableSink(HMSExternalTable targetTable) { @@ -175,6 +177,7 @@ public class HiveTableSink extends BaseExternalTableDataSink { compressType = targetTable.getRemoteTable().getParameters().get("parquet.compression"); break; case FORMAT_CSV_PLAIN: + case FORMAT_TEXT: compressType = targetTable.getRemoteTable().getParameters().get("text.compression"); if (Strings.isNullOrEmpty(compressType)) { compressType = ConnectContext.get().getSessionVariable().hiveTextCompression(); @@ -213,8 +216,13 @@ public class HiveTableSink extends BaseExternalTableDataSink { private void setSerDeProperties(THiveTableSink tSink) { THiveSerDeProperties serDeProperties = new THiveSerDeProperties(); Table table = targetTable.getRemoteTable(); + String serDeLib = table.getSd().getSerdeInfo().getSerializationLib(); // 1. set field delimiter - serDeProperties.setFieldDelim(HiveProperties.getFieldDelimiter(table)); + if (HiveMetaStoreClientHelper.HIVE_MULTI_DELIMIT_SERDE.equals(serDeLib)) { + serDeProperties.setFieldDelim(HiveProperties.getFieldDelimiter(table, true)); + } else { + serDeProperties.setFieldDelim(HiveProperties.getFieldDelimiter(table)); + } // 2. set line delimiter serDeProperties.setLineDelim(HiveProperties.getLineDelimiter(table)); // 3. set collection delimiter diff --git a/regression-test/data/external_table_p0/hive/test_multi_delimit_serde.out b/regression-test/data/external_table_p0/hive/test_multi_delimit_serde.out new file mode 100644 index 00000000000..f2bac26a2a1 Binary files /dev/null and b/regression-test/data/external_table_p0/hive/test_multi_delimit_serde.out differ diff --git a/regression-test/suites/external_table_p0/hive/test_multi_delimit_serde.groovy b/regression-test/suites/external_table_p0/hive/test_multi_delimit_serde.groovy new file mode 100644 index 00000000000..8823a169ff5 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_multi_delimit_serde.groovy @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_multi_delimit_serde", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = "${hivePrefix}_test_multi_delimit_serde" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + + sql """use regression;""" + + try { + // Test 1: MultiDelimitSerDe with |+| delimiter - using pre-created table + qt_01 """SELECT * FROM multi_delimit_test ORDER BY k1""" + + // Test 2: Different multi-character delimiter - using pre-created table + qt_02 """SELECT * FROM multi_delimit_test2 ORDER BY id""" + + // Test 3: Complex types with array and map to test collection.delim and mapkey.delim + logger.info("Test 3: Using pre-created table with array and map types") + qt_03 """SELECT id, name, tags, properties FROM multi_delimit_complex_test ORDER BY id""" + + // Test 4: Insert data using Doris to write to Hive MultiDelimitSerDe tables + logger.info("Test 4: Testing Doris INSERT to Hive MultiDelimitSerDe tables") + + // Test 4.1: Insert to basic multi-delimit table + sql """INSERT INTO multi_delimit_test VALUES (4, 400, 'test4'), (5, 500, 'test5')""" + qt_04 """SELECT * FROM multi_delimit_test WHERE k1 >= 4 ORDER BY k1""" + + // Test 4.2: Insert to double-pipe delimited table + sql """INSERT INTO multi_delimit_test2 VALUES (4, 4.5, 'description4'), (5, 5.5, 'description5')""" + qt_05 """SELECT * FROM multi_delimit_test2 WHERE id >= 4 ORDER BY id""" + + // Test 4.3: Insert to complex types table with arrays and maps + sql """INSERT INTO multi_delimit_complex_test VALUES + (3, 'user3', ARRAY('tagX', 'tagY'), MAP('newkey', 'newvalue'), ARRAY(ARRAY(7, 8)))""" + qt_06 """SELECT id, name, tags, properties FROM multi_delimit_complex_test WHERE id = 3 ORDER BY id""" + + // Test 5: Show create table to check SerDe properties + logger.info("Test 5: Checking show create table") + def createTableResult = sql """SHOW CREATE TABLE multi_delimit_test""" + logger.info("Create table result: " + createTableResult.toString()) + + assertTrue(createTableResult.toString().contains("MultiDelimitSerDe")) + assertTrue(createTableResult.toString().contains("field.delim")) + } catch (Exception e) { + logger.warn("Test failed, this might be expected if Hive version doesn't support MultiDelimitSerDe: " + e.getMessage()) + if (e.getMessage().contains("Unsupported hive table serde")) { + logger.info("Got expected 'Unsupported hive table serde' error before implementing MultiDelimitSerDe support") + } + } + sql """drop catalog if exists ${catalog_name}""" + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org