This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 88dd480c2e3 [enhancement](CSV-reader) enhance err log for csv reading
containing enclose or escape (#25816)
88dd480c2e3 is described below
commit 88dd480c2e3b5af8a1191a729ddd118711de4452
Author: Siyang Tang <[email protected]>
AuthorDate: Tue Oct 24 22:10:08 2023 +0800
[enhancement](CSV-reader) enhance err log for csv reading containing
enclose or escape (#25816)
---
be/src/http/action/stream_load.cpp | 10 ++++++++++
be/src/vec/exec/format/csv/csv_reader.cpp | 20 +++++++++++++++-----
2 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/be/src/http/action/stream_load.cpp
b/be/src/http/action/stream_load.cpp
index 55541843241..99a98afbc37 100644
--- a/be/src/http/action/stream_load.cpp
+++ b/be/src/http/action/stream_load.cpp
@@ -398,9 +398,19 @@ Status StreamLoadAction::_process_put(HttpRequest*
http_req,
request.__set_line_delimiter(http_req->header(HTTP_LINE_DELIMITER));
}
if (!http_req->header(HTTP_ENCLOSE).empty() &&
http_req->header(HTTP_ENCLOSE).size() > 0) {
+ const auto& enclose_str = http_req->header(HTTP_ENCLOSE);
+ if (enclose_str.length() != 1) {
+ return Status::InvalidArgument("enclose must be single-char,
actually is {}",
+ enclose_str);
+ }
request.__set_enclose(http_req->header(HTTP_ENCLOSE)[0]);
}
if (!http_req->header(HTTP_ESCAPE).empty() &&
http_req->header(HTTP_ESCAPE).size() > 0) {
+ const auto& escape_str = http_req->header(HTTP_ESCAPE);
+ if (escape_str.length() != 1) {
+ return Status::InvalidArgument("escape must be single-char,
actually is {}",
+ escape_str);
+ }
request.__set_escape(http_req->header(HTTP_ESCAPE)[0]);
}
if (!http_req->header(HTTP_PARTITIONS).empty()) {
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 5cb29ed4e77..2d7c116a34e 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -737,11 +737,21 @@ Status CsvReader::_line_split_to_values(const Slice&
line, bool* success) {
fmt::format_to(error_msg, "{} {} {}",
"actual column number in csv file is ",
cmp_str,
" schema column number.");
- fmt::format_to(error_msg, "actual number: {}, column
separator: [{}], ",
- _split_values.size(), _value_separator);
- fmt::format_to(error_msg,
- "line delimiter: [{}], schema column
number: {}; ",
- _line_delimiter,
_file_slot_descs.size());
+ fmt::format_to(error_msg, "actual number: {}, schema
column number: {}; ",
+ _split_values.size(),
_file_slot_descs.size());
+ fmt::format_to(error_msg, "line delimiter: [{}],
column separator: [{}], ",
+ _line_delimiter, _value_separator);
+ if (_enclose != 0) {
+ fmt::format_to(error_msg, "enclose:[{}] ",
_enclose);
+ }
+ if (_escape != 0) {
+ fmt::format_to(error_msg, "escape:[{}] ", _escape);
+ }
+ fmt::memory_buffer values;
+ for (const auto& value : _split_values) {
+ fmt::format_to(values, "{}, ", value.to_string());
+ }
+ fmt::format_to(error_msg, "result values:[{}]",
fmt::to_string(values));
return fmt::to_string(error_msg);
},
&_line_reader_eof));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]