This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 88dd480c2e3 [enhancement](CSV-reader)  enhance err log for csv reading 
containing enclose or escape (#25816)
88dd480c2e3 is described below

commit 88dd480c2e3b5af8a1191a729ddd118711de4452
Author: Siyang Tang <[email protected]>
AuthorDate: Tue Oct 24 22:10:08 2023 +0800

    [enhancement](CSV-reader)  enhance err log for csv reading containing 
enclose or escape (#25816)
---
 be/src/http/action/stream_load.cpp        | 10 ++++++++++
 be/src/vec/exec/format/csv/csv_reader.cpp | 20 +++++++++++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/be/src/http/action/stream_load.cpp 
b/be/src/http/action/stream_load.cpp
index 55541843241..99a98afbc37 100644
--- a/be/src/http/action/stream_load.cpp
+++ b/be/src/http/action/stream_load.cpp
@@ -398,9 +398,19 @@ Status StreamLoadAction::_process_put(HttpRequest* 
http_req,
         request.__set_line_delimiter(http_req->header(HTTP_LINE_DELIMITER));
     }
     if (!http_req->header(HTTP_ENCLOSE).empty() && 
http_req->header(HTTP_ENCLOSE).size() > 0) {
+        const auto& enclose_str = http_req->header(HTTP_ENCLOSE);
+        if (enclose_str.length() != 1) {
+            return Status::InvalidArgument("enclose must be single-char, 
actually is {}",
+                                           enclose_str);
+        }
         request.__set_enclose(http_req->header(HTTP_ENCLOSE)[0]);
     }
     if (!http_req->header(HTTP_ESCAPE).empty() && 
http_req->header(HTTP_ESCAPE).size() > 0) {
+        const auto& escape_str = http_req->header(HTTP_ESCAPE);
+        if (escape_str.length() != 1) {
+            return Status::InvalidArgument("escape must be single-char, 
actually is {}",
+                                           escape_str);
+        }
         request.__set_escape(http_req->header(HTTP_ESCAPE)[0]);
     }
     if (!http_req->header(HTTP_PARTITIONS).empty()) {
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 5cb29ed4e77..2d7c116a34e 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -737,11 +737,21 @@ Status CsvReader::_line_split_to_values(const Slice& 
line, bool* success) {
                         fmt::format_to(error_msg, "{} {} {}",
                                        "actual column number in csv file is ", 
cmp_str,
                                        " schema column number.");
-                        fmt::format_to(error_msg, "actual number: {}, column 
separator: [{}], ",
-                                       _split_values.size(), _value_separator);
-                        fmt::format_to(error_msg,
-                                       "line delimiter: [{}], schema column 
number: {}; ",
-                                       _line_delimiter, 
_file_slot_descs.size());
+                        fmt::format_to(error_msg, "actual number: {}, schema 
column number: {}; ",
+                                       _split_values.size(), 
_file_slot_descs.size());
+                        fmt::format_to(error_msg, "line delimiter: [{}], 
column separator: [{}], ",
+                                       _line_delimiter, _value_separator);
+                        if (_enclose != 0) {
+                            fmt::format_to(error_msg, "enclose:[{}] ", 
_enclose);
+                        }
+                        if (_escape != 0) {
+                            fmt::format_to(error_msg, "escape:[{}] ", _escape);
+                        }
+                        fmt::memory_buffer values;
+                        for (const auto& value : _split_values) {
+                            fmt::format_to(values, "{}, ", value.to_string());
+                        }
+                        fmt::format_to(error_msg, "result values:[{}]", 
fmt::to_string(values));
                         return fmt::to_string(error_msg);
                     },
                     &_line_reader_eof));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to