This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 44c4a45f72 [fix](array-type) fix the wrong data when use stream load
to import '\N' (#12102)
44c4a45f72 is described below
commit 44c4a45f72f0be84ce49e9b92954daeabb65dc6c
Author: carlvinhust2012 <[email protected]>
AuthorDate: Mon Aug 29 09:53:37 2022 +0800
[fix](array-type) fix the wrong data when use stream load to import '\N'
(#12102)
Co-authored-by: hucheng01 <[email protected]>
---
be/src/exprs/cast_functions.cpp | 3 +
be/src/vec/functions/function_cast.h | 5 ++
.../data/load_p0/broker_load/simple_array.data | 3 +
.../data/load_p0/broker_load/test_array_load.out | 6 ++
.../load_p0/broker_load/test_array_load.groovy | 69 ++++++++++++++++++++--
5 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/be/src/exprs/cast_functions.cpp b/be/src/exprs/cast_functions.cpp
index 9885d9941e..d2d887ac61 100644
--- a/be/src/exprs/cast_functions.cpp
+++ b/be/src/exprs/cast_functions.cpp
@@ -810,6 +810,9 @@ DateTimeV2Val
CastFunctions::cast_to_datetimev2_val(FunctionContext* ctx, const
}
CollectionVal CastFunctions::cast_to_array_val(FunctionContext* context, const
StringVal& val) {
+ if (val.is_null) {
+ return CollectionVal::null();
+ }
CollectionVal array_val;
Status status = ArrayParser::parse(array_val, context, val);
return status.ok() ? array_val : CollectionVal::null();
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 11d8d7ed44..0ed17714b4 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -325,6 +325,11 @@ struct ConvertImplGenericFromString {
for (size_t i = 0; i < size; ++i) {
const auto& val = col_from_string->get_data_at(i);
+ // Note: here we should handle the null element
+ if (val.size == 0) {
+ col_to->insert_default();
+ continue;
+ }
ReadBuffer read_buffer((char*)(val.data), val.size);
RETURN_IF_ERROR(data_type_to->from_string(read_buffer,
col_to));
}
diff --git a/regression-test/data/load_p0/broker_load/simple_array.data
b/regression-test/data/load_p0/broker_load/simple_array.data
new file mode 100644
index 0000000000..7501722c69
--- /dev/null
+++ b/regression-test/data/load_p0/broker_load/simple_array.data
@@ -0,0 +1,3 @@
+1/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/["1991-01-01"]/["1991-01-01
00:00:00"]/[0.33,0.67]/[3.1415926,0.878787878]/[1,1.2,1.3]
+2/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/\N/\N/\N/\N/[1,\N,1.3]
+3/\N/\N/\N/\N/\N/\N/\N/\N/\N/\N
\ No newline at end of file
diff --git a/regression-test/data/load_p0/broker_load/test_array_load.out
b/regression-test/data/load_p0/broker_load/test_array_load.out
index da94539584..0dfbd74d4f 100644
--- a/regression-test/data/load_p0/broker_load/test_array_load.out
+++ b/regression-test/data/load_p0/broker_load/test_array_load.out
@@ -31,6 +31,12 @@
5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a',
'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01
00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3]
100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a',
'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00]
[0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67]
+-- !select --
+1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a',
'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01
00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3]
+2 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a',
'b', 'c', 'd', 'e'] ['hello', 'world'] \N \N \N \N
[1, NULL, 1.3]
+3 \N \N \N \N \N \N \N \N \N
\N
+100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a',
'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00]
[0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67]
+
-- !select --
1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a',
'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01
00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3]
2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536]
['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01]
[1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1,
1.2, 1.3]
diff --git a/regression-test/suites/load_p0/broker_load/test_array_load.groovy
b/regression-test/suites/load_p0/broker_load/test_array_load.groovy
index d585515c98..048e133522 100644
--- a/regression-test/suites/load_p0/broker_load/test_array_load.groovy
+++ b/regression-test/suites/load_p0/broker_load/test_array_load.groovy
@@ -18,6 +18,7 @@
suite("test_array_load", "p0") {
// define a sql table
def testTable = "tbl_test_array_load"
+ def testTable01 = "tbl_test_array_load01"
def create_test_table = {testTablex, enable_vectorized_flag ->
// multi-line sql
@@ -64,11 +65,52 @@ suite("test_array_load", "p0") {
assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
}
- def load_array_data = {strip_flag, read_flag, format_flag, exprs,
json_paths,
+ def create_test_table01 = {testTablex ->
+ // multi-line sql
+ sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')"
+
+ def result1 = sql """
+ CREATE TABLE IF NOT EXISTS ${testTable01} (
+ `k1` INT(11) NULL COMMENT "",
+ `k2` ARRAY<SMALLINT> NULL COMMENT "",
+ `k3` ARRAY<INT(11)> NULL COMMENT "",
+ `k4` ARRAY<BIGINT> NULL COMMENT "",
+ `k5` ARRAY<CHAR> NULL COMMENT "",
+ `k6` ARRAY<VARCHAR(20)> NULL COMMENT "",
+ `k7` ARRAY<DATE> NULL COMMENT "",
+ `k8` ARRAY<DATETIME> NULL COMMENT "",
+ `k9` ARRAY<FLOAT> NULL COMMENT "",
+ `k10` ARRAY<DOUBLE> NULL COMMENT "",
+ `k11` ARRAY<DECIMAL(20, 6)> NULL COMMENT ""
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ )
+ """
+
+ // DDL/DML return 1 row and 3 column, the only value is update row
count
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+ // insert 1 row to check whether the table is ok
+ def result2 = sql """ INSERT INTO ${testTable01} VALUES
+ (100, [1, 2, 3], [32767, 32768, 32769], [65534, 65535,
65536], ['a', 'b', 'c'], ["hello", "world"],
+ ['2022-07-13'], ['2022-07-13 12:30:00'], [0.33, 0.67],
[3.1415926, 0.878787878], [4, 5.5, 6.67])
+ """
+ assertTrue(result2.size() == 1)
+ assertTrue(result2[0].size() == 1)
+ assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+ }
+
+ def load_array_data = {table_name, strip_flag, read_flag, format_flag,
exprs, json_paths,
json_root, where_expr, fuzzy_flag, column_sep,
file_name ->
// load the json data
streamLoad {
- table "tbl_test_array_load"
+ table table_name
// set http request header params
set 'strip_outer_array', strip_flag
@@ -160,7 +202,7 @@ suite("test_array_load", "p0") {
create_test_table.call(testTable, true)
- load_array_data.call('true', '', 'json', '', '', '', '', '', '',
'simple_array.json')
+ load_array_data.call(testTable, 'true', '', 'json', '', '', '', '',
'', '', 'simple_array.json')
// select the table and check whether the data is correct
qt_select "select * from ${testTable} order by k1"
@@ -175,7 +217,7 @@ suite("test_array_load", "p0") {
create_test_table.call(testTable, false)
- load_array_data.call('true', '', 'json', '', '', '', '', '', '',
'simple_array.json')
+ load_array_data.call(testTable, 'true', '', 'json', '', '', '', '',
'', '', 'simple_array.json')
// select the table and check whether the data is correct
qt_select "select * from ${testTable} order by k1"
@@ -190,7 +232,7 @@ suite("test_array_load", "p0") {
create_test_table.call(testTable, true)
- load_array_data.call('true', '', 'csv', '', '', '', '', '', '/',
'simple_array.csv')
+ load_array_data.call(testTable, 'true', '', 'csv', '', '', '', '', '',
'/', 'simple_array.csv')
// select the table and check whether the data is correct
qt_select "select * from ${testTable} order by k1"
@@ -205,7 +247,7 @@ suite("test_array_load", "p0") {
create_test_table.call(testTable, false)
- load_array_data.call('true', '', 'csv', '', '', '', '', '', '/',
'simple_array.csv')
+ load_array_data.call(testTable, 'true', '', 'csv', '', '', '', '', '',
'/', 'simple_array.csv')
// select the table and check whether the data is correct
qt_select "select * from ${testTable} order by k1"
@@ -214,6 +256,21 @@ suite("test_array_load", "p0") {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}
+ // case5: import array data not specify the format
+ try {
+ sql "DROP TABLE IF EXISTS ${testTable01}"
+
+ create_test_table01.call(testTable01)
+
+ load_array_data.call(testTable01, '', '', '', '', '', '', '', '', '/',
'simple_array.data')
+
+ // select the table and check whether the data is correct
+ qt_select "select * from ${testTable01} order by k1"
+
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${testTable01}")
+ }
+
// if 'enableHdfs' in regression-conf.groovy has been set to true,
// the test will run these case as below.
if (enableHdfs()) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]