This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e4a83a22d14 [opt](error msg) Make data codec error clearly when load
csv data can't display (#25540)
e4a83a22d14 is described below
commit e4a83a22d144df387789eb3e6b1501ed897c20e6
Author: YueW <[email protected]>
AuthorDate: Wed Oct 18 16:12:22 2023 +0800
[opt](error msg) Make data codec error clearly when load csv data can't
display (#25540)
Co-authored-by: Tanya-W <tanya1218w@163,com>
---
be/src/vec/exec/format/csv/csv_reader.cpp | 8 ++-
.../stream_load/csv_with_none_utf8_data.csv | 4 ++
.../test_csv_with_none_utf8_data.groovy | 73 ++++++++++++++++++++++
3 files changed, 82 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 2382cb92de4..ea16234e016 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -703,10 +703,12 @@ Status CsvReader::_validate_line(const Slice& line, bool*
success) {
return Status::InternalError("Only support csv data in utf8
codec");
} else {
RETURN_IF_ERROR(_state->append_error_msg_to_file(
- []() -> std::string { return "Unable to display"; },
- []() -> std::string {
+ [&]() -> std::string { return std::string(line.data,
line.size); },
+ [&]() -> std::string {
fmt::memory_buffer error_msg;
- fmt::format_to(error_msg, "{}", "Unable to display");
+ fmt::format_to(error_msg, "{}{}",
+ "Unable to display, only support csv
data in utf8 codec",
+ ", please check the data encoding");
return fmt::to_string(error_msg);
},
&_line_reader_eof));
diff --git
a/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv
b/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv
new file mode 100644
index 00000000000..86d326d0c62
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv
@@ -0,0 +1,4 @@
+123abc2022-12-012022-12-01:09:30:31
+233��ǰ���¹⣬���ǵ���˪2022-12-012022-12-01:09:30:31
+343efg2022-12-012022-12-01:09:30:31
+453��� ��̫��2022-12-012022-12-01:09:30:31
diff --git
a/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy
b/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy
new file mode 100644
index 00000000000..bca699f7433
--- /dev/null
+++
b/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy
@@ -0,0 +1,73 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_csv_with_none_utf8_data", "p0") {
+ def tableName = "test_csv_with_none_utf8_data"
+
+ // create table
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `k1` int(20) NULL,
+ `k2` bigint(20) NULL,
+ `v1` tinyint(4) NULL,
+ `v2` text NULL,
+ `v3` date NULL,
+ `v4` datetime NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`, `k2`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1");
+ """
+
+ streamLoad {
+ table "${tableName}"
+
+ set 'column_separator', '\\x01'
+
+ file 'csv_with_none_utf8_data.csv'
+
+ // stream load action will check result, include Success status, and
NumberTotalRows == NumberLoadedRows
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ def (code, out, err) = curl("GET", json.ErrorURL)
+ log.info("error result: " + out)
+ def checkError = out.contains("Unable to display, only support csv
data in utf8 codec")
+ assertTrue(checkError)
+ assertEquals("fail", json.Status.toLowerCase())
+ assertTrue(json.Message.contains("too many filtered rows"))
+ assertEquals(4, json.NumberTotalRows)
+ assertEquals(2, json.NumberLoadedRows)
+ assertEquals(2, json.NumberFilteredRows)
+ assertTrue(json.LoadBytes > 0)
+ log.info("url: " + json.ErrorURL)
+ }
+ }
+
+
+ // drop drop
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]