This is an automated email from the ASF dual-hosted git repository.
ic4y pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 7345f27a9c [Feature][connectors-v2]File support quote char and escape
char (#10185)
7345f27a9c is described below
commit 7345f27a9c8aab7e2db05fbce1681b75d1dbc4f1
Author: 老王 <[email protected]>
AuthorDate: Tue Jan 6 12:07:18 2026 +0800
[Feature][connectors-v2]File support quote char and escape char (#10185)
---
docs/en/connector-v2/source/CosFile.md | 10 ++
docs/en/connector-v2/source/FtpFile.md | 10 ++
docs/en/connector-v2/source/HdfsFile.md | 16 +-
docs/en/connector-v2/source/LocalFile.md | 13 +-
docs/en/connector-v2/source/ObsFile.md | 2 +
docs/en/connector-v2/source/OssFile.md | 10 ++
docs/en/connector-v2/source/OssJindoFile.md | 10 ++
docs/en/connector-v2/source/S3File.md | 10 ++
docs/en/connector-v2/source/SftpFile.md | 14 +-
docs/zh/connector-v2/source/CosFile.md | 10 ++
docs/zh/connector-v2/source/FtpFile.md | 10 ++
docs/zh/connector-v2/source/HdfsFile.md | 12 +-
docs/zh/connector-v2/source/LocalFile.md | 10 ++
docs/zh/connector-v2/source/ObsFile.md | 35 +++--
docs/zh/connector-v2/source/OssFile.md | 162 +++++++++++----------
docs/zh/connector-v2/source/OssJindoFile.md | 46 +++---
docs/zh/connector-v2/source/S3File.md | 102 +++++++------
docs/zh/connector-v2/source/SftpFile.md | 2 +
.../file/config/FileBaseSourceOptions.java | 14 ++
.../file/source/reader/AbstractReadStrategy.java | 3 +
.../file/source/reader/CsvReadStrategy.java | 22 ++-
.../file/source/reader/CsvReadStrategyTest.java | 50 +++++++
.../csv/special_quote_char_break_line.csv | 6 +
.../file/cos/source/CosFileSourceFactory.java | 2 +
.../file/ftp/source/FtpFileSourceFactory.java | 2 +
.../file/hdfs/source/HdfsFileSourceFactory.java | 2 +
.../oss/jindo/source/OssFileSourceFactory.java | 2 +
.../file/local/source/LocalFileSourceFactory.java | 2 +
.../file/obs/source/ObsFileSourceFactory.java | 2 +
.../file/oss/source/OssFileSourceFactory.java | 2 +
.../file/s3/source/S3FileSourceFactory.java | 2 +
.../file/sftp/source/SftpFileSourceFactory.java | 2 +
32 files changed, 424 insertions(+), 173 deletions(-)
diff --git a/docs/en/connector-v2/source/CosFile.md
b/docs/en/connector-v2/source/CosFile.md
index 819b867ec8..a4c2fa5ef4 100644
--- a/docs/en/connector-v2/source/CosFile.md
+++ b/docs/en/connector-v2/source/CosFile.md
@@ -83,6 +83,8 @@ To use this connector you need put
hadoop-cos-{hadoop.version}-{version}.jar and
| common-options | | no | -
|
| file_filter_modified_start | string | no | -
|
| file_filter_modified_end | string | no | -
|
+| quote_char | string | no | "
|
+| escape_char | string | no | -
|
### path [string]
@@ -417,6 +419,14 @@ File modification time filter. The connector will filter
some files base on the
File modification time filter. The connector will filter some files base on
the last modification end time (not include end time). The default data format
is `yyyy-MM-dd HH:mm:ss`.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### common options
Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details.
diff --git a/docs/en/connector-v2/source/FtpFile.md
b/docs/en/connector-v2/source/FtpFile.md
index 46389a6668..70a6e5cb66 100644
--- a/docs/en/connector-v2/source/FtpFile.md
+++ b/docs/en/connector-v2/source/FtpFile.md
@@ -79,6 +79,8 @@ If you use SeaTunnel Engine, It automatically integrated the
hadoop jar when you
| common-options | | no | -
|
| file_filter_modified_start | string | no | -
|
| file_filter_modified_end | string | no | -
|
+| quote_char | string | no | "
|
+| escape_char | string | no | -
|
### host [string]
@@ -440,6 +442,14 @@ File modification time filter. The connector will filter
some files base on the
File modification time filter. The connector will filter some files base on
the last modification end time (not include end time). The default data format
is `yyyy-MM-dd HH:mm:ss`.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### common options
Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details.
diff --git a/docs/en/connector-v2/source/HdfsFile.md
b/docs/en/connector-v2/source/HdfsFile.md
index a5895112d6..2ebb6de0d4 100644
--- a/docs/en/connector-v2/source/HdfsFile.md
+++ b/docs/en/connector-v2/source/HdfsFile.md
@@ -75,7 +75,7 @@ Read data from hdfs file system.
| file_filter_pattern | string | no |
| Filter pattern, which used for filtering files.
|
| filename_extension | string | no | -
| Filter filename extension, which used for filtering files with specific
extension. Example: `csv` `.txt` `json` `.xml`.
|
| compress_codec | string | no | none
| The compress codec of files
|
-| archive_compress_codec | string | no | none
|
+| archive_compress_codec | string | no | none
|
|
| encoding | string | no | UTF-8
|
|
| null_format | string | no | -
| Only used when file_format_type is text. null_format to define which strings
can be represented as null. e.g: `\N`
|
| binary_chunk_size | int | no | 1024
| Only used when file_format_type is binary. The chunk size (in bytes) for
reading binary files. Default is 1024 bytes. Larger values may improve
performance for large files but use more memory.
|
@@ -83,6 +83,8 @@ Read data from hdfs file system.
| common-options | | no | -
| Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details.
|
| file_filter_modified_start | string | no | -
| File modification time filter. The connector will filter some files base on
the last modification start time (include start time). The default data format
is `yyyy-MM-dd HH:mm:ss`.
|
| file_filter_modified_end | string | no | -
| File modification time filter. The connector will filter some files base on
the last modification end time (not include end time). The default data format
is `yyyy-MM-dd HH:mm:ss`.
|
+| quote_char | string | no | "
| A single character that encloses CSV fields, allowing fields with commas,
line breaks, or quotes to be read correctly.
|
+| escape_char | string | no | -
| A single character that allows the quote or other special characters to
appear inside a CSV field without ending the field.
|
### file_format_type [string]
@@ -183,8 +185,8 @@ The compress codec of files and the details that supported
as the following show
The compress codec of archive files and the details that supported as the
following shown:
-| archive_compress_codec | file_format | archive_compress_suffix |
-|------------------------|-------------------|-------------------------|
+| archive_compress_codec | file_format | archive_compress_suffix |
+|------------------------|--------------------|-------------------------|
| ZIP | txt,json,excel,xml | .zip |
| TAR | txt,json,excel,xml | .tar |
| TAR_GZ | txt,json,excel,xml | .tar.gz |
@@ -210,6 +212,14 @@ Only used when file_format_type is binary.
Whether to read the complete file as a single chunk instead of splitting into
chunks. When enabled, the entire file content will be read into memory at once.
Default is false.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### Tips
> If you use spark/flink, In order to use this connector, You must ensure your
> spark/flink cluster already integrated hadoop. The tested hadoop version is
> 2.x. If you use SeaTunnel Engine, It automatically integrated the hadoop jar
> when you download and install SeaTunnel Engine. You can check the jar
> package under ${SEATUNNEL_HOME}/lib to confirm this.
diff --git a/docs/en/connector-v2/source/LocalFile.md
b/docs/en/connector-v2/source/LocalFile.md
index f0397c621d..e6de4f3033 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -81,8 +81,9 @@ If you use SeaTunnel Engine, It automatically integrated the
hadoop jar when you
| file_filter_modified_start | string | no | -
|
| file_filter_modified_end | string | no | -
|
| enable_file_split | boolean | no | false
|
-| file_split_size | long | no | 134217728
|
-
+| file_split_size | long | no | 134217728
|
+| quote_char | string | no | "
|
+| escape_char | string | no | -
|
### path [string]
The source file path.
@@ -425,6 +426,14 @@ Turn on the file splitting function, the default is
false。It can be selected w
File split size, which can be filled in when the enable_file_split parameter
is true. The unit is the number of bytes. The default value is the number of
bytes of 128MB, which is 134217728.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### common options
Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details
diff --git a/docs/en/connector-v2/source/ObsFile.md
b/docs/en/connector-v2/source/ObsFile.md
index ddb7389903..88c824649c 100644
--- a/docs/en/connector-v2/source/ObsFile.md
+++ b/docs/en/connector-v2/source/ObsFile.md
@@ -84,6 +84,8 @@ It only supports hadoop version **2.9.X+**.
| sheet_name | string | no | - |
Reader the sheet of the workbook,Only used when file_format is excel.
|
| file_filter_modified_start | string | no | - | File
modification time filter. The connector will filter some files base on the last
modification start time (include start time). The default data format is
`yyyy-MM-dd HH:mm:ss`. |
| file_filter_modified_end | string | no | - | File
modification time filter. The connector will filter some files base on the last
modification end time (not include end time). The default data format is
`yyyy-MM-dd HH:mm:ss`. |
+| quote_char | string | no | " | A
single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
|
+| escape_char | string | no | - | A
single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
|
### Tips
diff --git a/docs/en/connector-v2/source/OssFile.md
b/docs/en/connector-v2/source/OssFile.md
index 1bcdf9fa8d..7ad66f67a6 100644
--- a/docs/en/connector-v2/source/OssFile.md
+++ b/docs/en/connector-v2/source/OssFile.md
@@ -215,6 +215,8 @@ If you assign file type to `parquet` `orc`, schema option
not required, connecto
| common-options | config | no | - |
Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details.
|
| file_filter_modified_start | string | no | - | File
modification time filter. The connector will filter some files base on the last
modification start time (include start time). The default data format is
`yyyy-MM-dd HH:mm:ss`.
|
| file_filter_modified_end | string | no | - | File
modification time filter. The connector will filter some files base on the last
modification end time (not include end time). The default data format is
`yyyy-MM-dd HH:mm:ss`.
|
+| quote_char | string | no | " | A
single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
|
+| escape_char | string | no | - | A
single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
|
### file_format_type [string]
@@ -263,6 +265,14 @@ Only used when file_format_type is binary.
Whether to read the complete file as a single chunk instead of splitting into
chunks. When enabled, the entire file content will be read into memory at once.
Default is false.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### file_filter_pattern [string]
Filter pattern, which used for filtering files. If you only want to filter
based on file names, simply write the regular file names; If you want to filter
based on the file directory at the same time, the expression needs to start
with `path`.
diff --git a/docs/en/connector-v2/source/OssJindoFile.md
b/docs/en/connector-v2/source/OssJindoFile.md
index 221a22fa0f..93c513b75e 100644
--- a/docs/en/connector-v2/source/OssJindoFile.md
+++ b/docs/en/connector-v2/source/OssJindoFile.md
@@ -85,6 +85,8 @@ It only supports hadoop version **2.9.X+**.
| common-options | | no | -
|
| file_filter_modified_start | string | no | -
|
| file_filter_modified_end | string | no | -
|
+| quote_char | string | no | "
|
+| escape_char | string | no | -
|
### path [string]
@@ -398,6 +400,14 @@ File modification time filter. The connector will filter
some files base on the
File modification time filter. The connector will filter some files base on
the last modification end time (not include end time). The default data format
is `yyyy-MM-dd HH:mm:ss`.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### common options
Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details.
diff --git a/docs/en/connector-v2/source/S3File.md
b/docs/en/connector-v2/source/S3File.md
index be3b5112cd..54a23d3756 100644
--- a/docs/en/connector-v2/source/S3File.md
+++ b/docs/en/connector-v2/source/S3File.md
@@ -222,6 +222,8 @@ If you assign file type to `parquet` `orc`, schema option
not required, connecto
| file_filter_pattern | string | no |
| Filter pattern, which used for filtering
files.
[...]
| filename_extension | string | no | -
| Filter filename extension, which used for
filtering files with specific extension. Example: `csv` `.txt` `json` `.xml`.
[...]
| common-options | | no | -
| Source plugin common parameters, please refer
to [Source Common Options](../source-common-options.md) for details.
[...]
+| quote_char | string | no | "
| A single character that encloses CSV fields,
allowing fields with commas, line breaks, or quotes to be read correctly.
[...]
+| escape_char | string | no | -
| A single character that allows the quote or
other special characters to appear inside a CSV field without ending the field.
[...]
### file_format_type [string]
@@ -349,6 +351,14 @@ Only used when file_format_type is binary.
Whether to read the complete file as a single chunk instead of splitting into
chunks. When enabled, the entire file content will be read into memory at once.
Default is false.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
## Example
1. In this example, We read data from s3 path
`s3a://seatunnel-test/seatunnel/text` and the file type is orc in this path.
diff --git a/docs/en/connector-v2/source/SftpFile.md
b/docs/en/connector-v2/source/SftpFile.md
index 980ad9ae33..d19d5799ed 100644
--- a/docs/en/connector-v2/source/SftpFile.md
+++ b/docs/en/connector-v2/source/SftpFile.md
@@ -102,14 +102,16 @@ The File does not have a specific type list, and we can
indicate which SeaTunnel
| csv_use_header_line | boolean | no | false
| Whether to use the header line to parse the file, only used when the
file_format is `csv` and the file contains the header line that match RFC 4180
|
| schema | Config | No | -
| Please check #schema below
|
| compress_codec | String | No | None
| The compress codec of files and the details that supported as the
following shown: <br/> - txt: `lzo` `None` <br/> - json: `lzo` `None` <br/> -
csv: `lzo` `None` <br/> - orc: `lzo` `snappy` `lz4` `zlib` `None` <br/> -
parquet: `lzo` `snappy` `lz4` `gzip` `brotli` `zstd` `None` <br/> Tips: excel
type does Not support any compression format |
-| archive_compress_codec | string | no | none
|
-| encoding | string | no | UTF-8
|
+| archive_compress_codec | string | no | none
|
|
+| encoding | string | no | UTF-8
|
|
| null_format | string | no | -
| Only used when file_format_type is text. null_format to define which
strings can be represented as null. e.g: `\N`
|
| binary_chunk_size | int | no | 1024
| Only used when file_format_type is binary. The chunk size (in bytes) for
reading binary files. Default is 1024 bytes. Larger values may improve
performance for large files but use more memory.
|
| binary_complete_file_mode | boolean | no | false
| Only used when file_format_type is binary. Whether to read the complete
file as a single chunk instead of splitting into chunks. When enabled, the
entire file content will be read into memory at once. Default is false.
|
| common-options | | No | -
| Source plugin common parameters, please refer to [Source Common
Options](../source-common-options.md) for details.
|
| file_filter_modified_start | string | no | -
| File modification time filter. The connector will filter some files base
on the last modification start time (include start time). The default data
format is `yyyy-MM-dd HH:mm:ss`.
|
| file_filter_modified_end | string | no | -
| File modification time filter. The connector will filter some files base
on the last modification end time (not include end time). The default data
format is `yyyy-MM-dd HH:mm:ss`.
|
+| quote_char | string | no | "
| A single character that encloses CSV fields, allowing fields with commas,
line breaks, or quotes to be read correctly.
|
+| escape_char | string | no | -
| A single character that allows the quote or other special characters to
appear inside a CSV field without ending the field.
|
### file_filter_pattern [string]
@@ -290,6 +292,14 @@ Only used when file_format_type is binary.
Whether to read the complete file as a single chunk instead of splitting into
chunks. When enabled, the entire file content will be read into memory at once.
Default is false.
+### quote_char [string]
+
+A single character that encloses CSV fields, allowing fields with commas, line
breaks, or quotes to be read correctly.
+
+### escape_char [string]
+
+A single character that allows the quote or other special characters to appear
inside a CSV field without ending the field.
+
### schema [config]
#### fields [Config]
diff --git a/docs/zh/connector-v2/source/CosFile.md
b/docs/zh/connector-v2/source/CosFile.md
index e45cdcffa4..bb4b38d9ef 100644
--- a/docs/zh/connector-v2/source/CosFile.md
+++ b/docs/zh/connector-v2/source/CosFile.md
@@ -82,6 +82,8 @@ import ChangeLog from '../changelog/connector-file-cos.md';
| common-options | | 否 | - |
| file_filter_modified_start | string | 否 | - |
| file_filter_modified_end | string | 否 | - |
+| quote_char | string | 否 | " |
+| escape_char | string | 否 | - |
### path [string]
@@ -411,6 +413,14 @@ abc.*
按照最后修改时间过滤文件。 要过滤的结束时间(不包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`。
+### quote_char [string]
+
+用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
+
+### escape_char [string]
+
+用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
+
### common options
源插件常用参数,详见[源端通用选项](../Source-common-Options.md)。
diff --git a/docs/zh/connector-v2/source/FtpFile.md
b/docs/zh/connector-v2/source/FtpFile.md
index 54a48370cb..a5b32bcd01 100644
--- a/docs/zh/connector-v2/source/FtpFile.md
+++ b/docs/zh/connector-v2/source/FtpFile.md
@@ -75,6 +75,8 @@ import ChangeLog from '../changelog/connector-file-ftp.md';
| common-options | | 否 | - |
| file_filter_modified_start | string | 否 | - |
| file_filter_modified_end | string | 否 | - |
+| quote_char | string | 否 | " |
+| escape_char | string | 否 | - |
### host [string]
@@ -410,6 +412,14 @@ SeaTunnel 将从源文件中跳过前 2 行。
按照最后修改时间过滤文件。 要过滤的结束时间(不包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`。
+### quote_char [string]
+
+用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
+
+### escape_char [string]
+
+用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
+
### 通用选项
源插件的通用参数,详情请参考 [源通用选项](../source-common-options.md)。
diff --git a/docs/zh/connector-v2/source/HdfsFile.md
b/docs/zh/connector-v2/source/HdfsFile.md
index 655f25192b..05b49ec67f 100644
--- a/docs/zh/connector-v2/source/HdfsFile.md
+++ b/docs/zh/connector-v2/source/HdfsFile.md
@@ -75,7 +75,7 @@ import ChangeLog from '../changelog/connector-file-hadoop.md';
| file_filter_pattern | string | 否 | |
过滤模式,用于过滤文件。
|
| filename_extension | string | 否 | - |
过滤文件扩展名,用于过滤具有特定扩展名的文件。示例:`csv` `.txt` `json` `.xml`。
|
| compress_codec | string | 否 | none |
文件的压缩编解码器
|
-| archive_compress_codec | string | 否 | none |
+| archive_compress_codec | string | 否 | none |
|
| encoding | string | 否 | UTF-8 |
|
| null_format | string | 否 | - | 仅在
file_format_type 为 text 时使用。null_format 定义哪些字符串可以表示为 null。例如:`\N`
|
| binary_chunk_size | int | 否 | 1024 | 仅在
file_format_type 为 binary 时使用。读取二进制文件的块大小(以字节为单位)。默认为 1024
字节。较大的值可能会提高大文件的性能,但会使用更多内存。
|
@@ -83,6 +83,8 @@ import ChangeLog from '../changelog/connector-file-hadoop.md';
| common-options | | 否 | - |
数据源插件通用参数,请参阅 [数据源通用选项](../source-common-options.md) 了解详情。
|
| file_filter_modified_start | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的开始时间(包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
| file_filter_modified_end | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的结束时间(不包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
+| quote_char | string | 否 | " | 用于包裹 CSV
字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
|
+| escape_char | string | 否 | - | 用于在 CSV
字段内转义引号或其他特殊字符,使其不会结束字段。
|
### file_format_type [string]
@@ -211,6 +213,14 @@ abc.*
是否将完整文件作为单个块读取,而不是分割成块。启用时,整个文件内容将一次性读入内存。默认为 false。
+### quote_char [string]
+
+用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
+
+### escape_char [string]
+
+用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
+
### 提示
> 如果您使用 spark/flink,为了使用此连接器,您必须确保您的 spark/flink 集群已经集成了 hadoop。测试过的 hadoop
> 版本是 2.x。如果您使用 SeaTunnel Engine,则在下载和安装 SeaTunnel Engine 时会自动集成 hadoop
> jar。您可以检查 `${SEATUNNEL_HOME}/lib` 下的 jar 包来确认这一点。
diff --git a/docs/zh/connector-v2/source/LocalFile.md
b/docs/zh/connector-v2/source/LocalFile.md
index 7f0432da4b..0ff85d51e7 100644
--- a/docs/zh/connector-v2/source/LocalFile.md
+++ b/docs/zh/connector-v2/source/LocalFile.md
@@ -82,6 +82,8 @@ import ChangeLog from '../changelog/connector-file-local.md';
| file_filter_modified_end | string | 否 | - |
| enable_file_split | boolean | 否 | false |
| file_split_size | long | 否 | 134217728 |
+| quote_char | string | 否 | - |
+| escape_char | string | 否 | - |
### path [string]
@@ -425,6 +427,14 @@ null_format 定义哪些字符串可以表示为 null。
文件分割大小,enable_file_split参数为true时可以填写。单位是字节数。默认值为128MB的字节数,即134217728。
+### quote_char [string]
+
+用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
+
+### escape_char [string]
+
+用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
+
### 通用选项
数据源插件通用参数,请参阅 [数据源通用选项](../source-common-options.md) 了解详情
diff --git a/docs/zh/connector-v2/source/ObsFile.md
b/docs/zh/connector-v2/source/ObsFile.md
index f66431ac5b..7943f23ab9 100644
--- a/docs/zh/connector-v2/source/ObsFile.md
+++ b/docs/zh/connector-v2/source/ObsFile.md
@@ -62,22 +62,25 @@ import ChangeLog from '../changelog/connector-file-obs.md';
## 选项
-| 参数名 | 类型 | 必须 | 默认值 | 描述 |
-|--------|------|------|--------|------|
-| path | string | 是 | - | 目标目录路径 |
-| file_format_type | string | 是 | - | 文件类型 |
-| bucket | string | 是 | - | OBS 文件系统的桶地址,例如:`obs://obs-bucket-name` |
-| access_key | string | 是 | - | OBS 文件系统的访问密钥 |
-| access_secret | string | 是 | - | OBS 文件系统的访问密钥 |
-| endpoint | string | 是 | - | OBS 文件系统的端点 |
-| read_columns | list | 是 | - | 数据源的读取列列表 |
-| delimiter | string | 否 | \001 | 字段分隔符 |
-| row_delimiter | string | 否 | \n | 行分隔符 |
-| parse_partition_from_path | boolean | 否 | true | 控制是否从文件路径解析分区键和值 |
-| skip_header_row_number | long | 否 | 0 | 跳过前几行,但仅适用于 txt 和 csv。 |
-| date_format | string | 否 | yyyy-MM-dd | 日期类型格式 |
-| datetime_format | string | 否 | yyyy-MM-dd HH:mm:ss | 日期时间类型格式 |
-| time_format | string | 否 | HH:mm:ss | 时间类型格式 |
+| 参数名 | 类型 | 必须 | 默认值 | 描述
|
+|---------------------------|---------|----|---------------------|-----------------------------------------|
+| path | string | 是 | - | 目标目录路径
|
+| file_format_type | string | 是 | - | 文件类型
|
+| bucket | string | 是 | - | OBS
文件系统的桶地址,例如:`obs://obs-bucket-name` |
+| access_key | string | 是 | - | OBS
文件系统的访问密钥 |
+| access_secret | string | 是 | - | OBS
文件系统的访问密钥 |
+| endpoint | string | 是 | - | OBS 文件系统的端点
|
+| read_columns | list | 是 | - | 数据源的读取列列表
|
+| delimiter | string | 否 | \001 | 字段分隔符
|
+| row_delimiter | string | 否 | \n | 行分隔符
|
+| parse_partition_from_path | boolean | 否 | true |
控制是否从文件路径解析分区键和值 |
+| skip_header_row_number | long | 否 | 0 | 跳过前几行,但仅适用于
txt 和 csv。 |
+| date_format | string | 否 | yyyy-MM-dd | 日期类型格式
|
+| datetime_format | string | 否 | yyyy-MM-dd HH:mm:ss | 日期时间类型格式
|
+| time_format | string | 否 | HH:mm:ss | 时间类型格式
|
+| quote_char | string | 否 | " | 用于包裹 CSV
字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。 |
+| escape_char | string | 否 | - | 用于在 CSV
字段内转义引号或其他特殊字符,使其不会结束字段。 |
+
## 变更日志
diff --git a/docs/zh/connector-v2/source/OssFile.md
b/docs/zh/connector-v2/source/OssFile.md
index 25bb1bce9c..d4a09bb94a 100644
--- a/docs/zh/connector-v2/source/OssFile.md
+++ b/docs/zh/connector-v2/source/OssFile.md
@@ -127,93 +127,95 @@ schema {
如果您将文件类型指定为`parquet` `orc`,则不需要schema选项,连接器可以自动找到上游数据的schema。
-| Orc数据类型 | SeaTunnel数据类型
|
-|----------------------------------|----------------------------------------------------------------|
-| BOOLEAN | BOOLEAN
|
-| INT | INT
|
-| BYTE | BYTE
|
-| SHORT | SHORT
|
-| LONG | LONG
|
-| FLOAT | FLOAT
|
-| DOUBLE | DOUBLE
|
-| BINARY | BINARY
|
-| STRING<br/>VARCHAR<br/>CHAR<br/> | STRING
|
-| DATE | LOCAL_DATE_TYPE
|
-| TIMESTAMP | LOCAL_DATE_TIME_TYPE
|
-| DECIMAL | DECIMAL
|
-| LIST(STRING) | STRING_ARRAY_TYPE
|
-| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE
|
-| LIST(TINYINT) | BYTE_ARRAY_TYPE
|
-| LIST(SMALLINT) | SHORT_ARRAY_TYPE
|
-| LIST(INT) | INT_ARRAY_TYPE
|
-| LIST(BIGINT) | LONG_ARRAY_TYPE
|
-| LIST(FLOAT) | FLOAT_ARRAY_TYPE
|
-| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE
|
+| Orc数据类型 | SeaTunnel数据类型 |
+|----------------------------------|-------------------------------|
+| BOOLEAN | BOOLEAN |
+| INT | INT |
+| BYTE | BYTE |
+| SHORT | SHORT |
+| LONG | LONG |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| BINARY | BINARY |
+| STRING<br/>VARCHAR<br/>CHAR<br/> | STRING |
+| DATE | LOCAL_DATE_TYPE |
+| TIMESTAMP | LOCAL_DATE_TIME_TYPE |
+| DECIMAL | DECIMAL |
+| LIST(STRING) | STRING_ARRAY_TYPE |
+| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE |
+| LIST(TINYINT) | BYTE_ARRAY_TYPE |
+| LIST(SMALLINT) | SHORT_ARRAY_TYPE |
+| LIST(INT) | INT_ARRAY_TYPE |
+| LIST(BIGINT) | LONG_ARRAY_TYPE |
+| LIST(FLOAT) | FLOAT_ARRAY_TYPE |
+| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE |
| Map<K,V> | MapType,K和V的类型将转换为SeaTunnel类型 |
-| STRUCT | SeaTunnelRowType
|
+| STRUCT | SeaTunnelRowType |
### Parquet文件类型
如果您将文件类型指定为`parquet` `orc`,则不需要schema选项,连接器可以自动找到上游数据的schema。
-| Parquet数据类型 | SeaTunnel数据类型 |
-|----------------------|----------------------------------------------------------------|
-| INT_8 | BYTE
|
-| INT_16 | SHORT
|
-| DATE | DATE
|
-| TIMESTAMP_MILLIS | TIMESTAMP
|
-| INT64 | LONG
|
-| INT96 | TIMESTAMP
|
-| BINARY | BYTES
|
-| FLOAT | FLOAT
|
-| DOUBLE | DOUBLE
|
-| BOOLEAN | BOOLEAN
|
-| FIXED_LEN_BYTE_ARRAY | TIMESTAMP<br/> DECIMAL
|
-| DECIMAL | DECIMAL
|
-| LIST(STRING) | STRING_ARRAY_TYPE
|
-| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE
|
-| LIST(TINYINT) | BYTE_ARRAY_TYPE
|
-| LIST(SMALLINT) | SHORT_ARRAY_TYPE
|
-| LIST(INT) | INT_ARRAY_TYPE
|
-| LIST(BIGINT) | LONG_ARRAY_TYPE
|
-| LIST(FLOAT) | FLOAT_ARRAY_TYPE
|
-| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE
|
+| Parquet数据类型 | SeaTunnel数据类型 |
+|----------------------|-------------------------------|
+| INT_8 | BYTE |
+| INT_16 | SHORT |
+| DATE | DATE |
+| TIMESTAMP_MILLIS | TIMESTAMP |
+| INT64 | LONG |
+| INT96 | TIMESTAMP |
+| BINARY | BYTES |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| BOOLEAN | BOOLEAN |
+| FIXED_LEN_BYTE_ARRAY | TIMESTAMP<br/> DECIMAL |
+| DECIMAL | DECIMAL |
+| LIST(STRING) | STRING_ARRAY_TYPE |
+| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE |
+| LIST(TINYINT) | BYTE_ARRAY_TYPE |
+| LIST(SMALLINT) | SHORT_ARRAY_TYPE |
+| LIST(INT) | INT_ARRAY_TYPE |
+| LIST(BIGINT) | LONG_ARRAY_TYPE |
+| LIST(FLOAT) | FLOAT_ARRAY_TYPE |
+| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE |
| Map<K,V> | MapType,K和V的类型将转换为SeaTunnel类型 |
-| STRUCT | SeaTunnelRowType
|
+| STRUCT | SeaTunnelRowType |
## 选项
-| 名称 | 类型 | 是否必需 | 默认值 | 描述
|
-|----------------------------|---------|------|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|
-| path | string | 是 | - |
需要读取的Oss路径,可以有子路径,但子路径需要满足一定的格式要求。具体要求可以参考"parse_partition_from_path"选项
|
-| file_format_type | string | 是 | - |
文件类型,支持以下文件类型:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`
`markdown` |
-| bucket | string | 是 | - |
oss文件系统的bucket地址,例如:`oss://seatunnel-test`。
|
-| endpoint | string | 是 | - | fs oss端点
|
-| read_columns | list | 否 | - |
数据源的读取列列表,用户可以使用它来实现字段投影。支持列投影的文件类型如下所示:`text` `csv` `parquet` `orc` `json`
`excel` `xml`。如果用户想在读取`text` `json` `csv`文件时使用此功能,必须配置"schema"选项。 |
-| access_key | string | 否 | - |
|
-| access_secret | string | 否 | - |
|
-| delimiter | string | 否 | \001 |
字段分隔符,用于告诉连接器在读取文本文件时如何切分字段。默认`\001`,与hive的默认分隔符相同。
|
-| row_delimiter | string | 否 | \n |
行分隔符,用于告诉连接器在读取文本文件时如何切分行。默认`\n`。
|
-| parse_partition_from_path | boolean | 否 | true |
控制是否从文件路径解析分区键和值。例如,如果您从路径`oss://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`读取文件。文件中的每条记录数据都将添加这两个字段:name="tyrantlucifer",age=16
|
-| date_format | string | 否 | yyyy-MM-dd |
日期类型格式,用于告诉连接器如何将字符串转换为日期,支持以下格式:`yyyy-MM-dd` `yyyy.MM.dd`
`yyyy/MM/dd`。默认`yyyy-MM-dd`
|
+| 名称 | 类型 | 是否必需 | 默认值 | 描述
|
+|----------------------------|---------|------|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|
+| path | string | 是 | - |
需要读取的Oss路径,可以有子路径,但子路径需要满足一定的格式要求。具体要求可以参考"parse_partition_from_path"选项
|
+| file_format_type | string | 是 | - |
文件类型,支持以下文件类型:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`
`markdown` |
+| bucket | string | 是 | - |
oss文件系统的bucket地址,例如:`oss://seatunnel-test`。
|
+| endpoint | string | 是 | - | fs oss端点
|
+| read_columns | list | 否 | - |
数据源的读取列列表,用户可以使用它来实现字段投影。支持列投影的文件类型如下所示:`text` `csv` `parquet` `orc` `json`
`excel` `xml`。如果用户想在读取`text` `json` `csv`文件时使用此功能,必须配置"schema"选项。 |
+| access_key | string | 否 | - |
|
+| access_secret | string | 否 | - |
|
+| delimiter | string | 否 | \001 |
字段分隔符,用于告诉连接器在读取文本文件时如何切分字段。默认`\001`,与hive的默认分隔符相同。
|
+| row_delimiter | string | 否 | \n |
行分隔符,用于告诉连接器在读取文本文件时如何切分行。默认`\n`。
|
+| parse_partition_from_path | boolean | 否 | true |
控制是否从文件路径解析分区键和值。例如,如果您从路径`oss://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`读取文件。文件中的每条记录数据都将添加这两个字段:name="tyrantlucifer",age=16
|
+| date_format | string | 否 | yyyy-MM-dd |
日期类型格式,用于告诉连接器如何将字符串转换为日期,支持以下格式:`yyyy-MM-dd` `yyyy.MM.dd`
`yyyy/MM/dd`。默认`yyyy-MM-dd`
|
| datetime_format | string | 否 | yyyy-MM-dd HH:mm:ss |
日期时间类型格式,用于告诉连接器如何将字符串转换为日期时间,支持以下格式:`yyyy-MM-dd HH:mm:ss` `yyyy.MM.dd
HH:mm:ss` `yyyy/MM/dd HH:mm:ss` `yyyyMMddHHmmss` |
-| time_format | string | 否 | HH:mm:ss |
时间类型格式,用于告诉连接器如何将字符串转换为时间,支持以下格式:`HH:mm:ss` `HH:mm:ss.SSS`
|
-| filename_extension | string | 否 | - |
过滤文件名扩展名,用于过滤具有特定扩展名的文件。例如:`csv` `.txt` `json` `.xml`。
|
-| skip_header_row_number | long | 否 | 0 |
跳过前几行,但仅适用于txt和csv。例如,设置如下:`skip_header_row_number = 2`。然后SeaTunnel将跳过源文件的前2行
|
-| csv_use_header_line | boolean | 否 | false |
是否使用标题行来解析文件,仅在file_format为`csv`且文件包含符合RFC 4180的标题行时使用
|
-| schema | config | 否 | - |
上游数据的schema。
|
-| sheet_name | string | 否 | - |
读取工作簿的工作表,仅在file_format为excel时使用。
|
-| xml_row_tag | string | 否 | - |
指定XML文件中数据行的标签名称,仅在file_format为xml时使用。
|
-| xml_use_attr_format | boolean | 否 | - |
指定是否使用标签属性格式处理数据,仅在file_format为xml时使用。
|
-| compress_codec | string | 否 | none |
文件使用的压缩编解码器。
|
-| encoding | string | 否 | UTF-8 |
-| null_format | string | 否 | - |
仅在file_format_type为text时使用。null_format用于定义哪些字符串可以表示为null。例如:`\N`
|
-| binary_chunk_size | int | 否 | 1024 |
仅在file_format_type为binary时使用。读取二进制文件的块大小(以字节为单位)。默认为1024字节。较大的值可能会提高大文件的性能,但会使用更多内存。
|
-| binary_complete_file_mode | boolean | 否 | false |
仅在file_format_type为binary时使用。是否将完整文件作为单个块读取,而不是分割成块。启用时,整个文件内容将一次性读入内存。默认为false。
|
-| file_filter_pattern | string | 否 | |
过滤模式,用于过滤文件。
|
-| common-options | config | 否 | - |
数据源插件通用参数,请参考[数据源通用选项](../source-common-options.md)了解详情。
|
-| file_filter_modified_start | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的开始时间(包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
-| file_filter_modified_end | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的结束时间(不包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
+| time_format | string | 否 | HH:mm:ss |
时间类型格式,用于告诉连接器如何将字符串转换为时间,支持以下格式:`HH:mm:ss` `HH:mm:ss.SSS`
|
+| filename_extension | string | 否 | - |
过滤文件名扩展名,用于过滤具有特定扩展名的文件。例如:`csv` `.txt` `json` `.xml`。
|
+| skip_header_row_number | long | 否 | 0 |
跳过前几行,但仅适用于txt和csv。例如,设置如下:`skip_header_row_number = 2`。然后SeaTunnel将跳过源文件的前2行
|
+| csv_use_header_line | boolean | 否 | false |
是否使用标题行来解析文件,仅在file_format为`csv`且文件包含符合RFC 4180的标题行时使用
|
+| schema | config | 否 | - |
上游数据的schema。
|
+| sheet_name | string | 否 | - |
读取工作簿的工作表,仅在file_format为excel时使用。
|
+| xml_row_tag | string | 否 | - |
指定XML文件中数据行的标签名称,仅在file_format为xml时使用。
|
+| xml_use_attr_format | boolean | 否 | - |
指定是否使用标签属性格式处理数据,仅在file_format为xml时使用。
|
+| compress_codec | string | 否 | none |
文件使用的压缩编解码器。
|
+| encoding | string | 否 | UTF-8 |
+| null_format | string | 否 | - |
仅在file_format_type为text时使用。null_format用于定义哪些字符串可以表示为null。例如:`\N`
|
+| binary_chunk_size | int | 否 | 1024 |
仅在file_format_type为binary时使用。读取二进制文件的块大小(以字节为单位)。默认为1024字节。较大的值可能会提高大文件的性能,但会使用更多内存。
|
+| binary_complete_file_mode | boolean | 否 | false |
仅在file_format_type为binary时使用。是否将完整文件作为单个块读取,而不是分割成块。启用时,整个文件内容将一次性读入内存。默认为false。
|
+| file_filter_pattern | string | 否 | |
过滤模式,用于过滤文件。
|
+| common-options | config | 否 | - |
数据源插件通用参数,请参考[数据源通用选项](../source-common-options.md)了解详情。
|
+| file_filter_modified_start | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的开始时间(包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
+| file_filter_modified_end | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的结束时间(不包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
+| quote_char | string | 否 | " | 用于包裹 CSV
字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
|
+| escape_char | string | 否 | - | 用于在 CSV
字段内转义引号或其他特殊字符,使其不会结束字段。
|
### compress_codec [string]
@@ -262,6 +264,14 @@ markdown 解析器提取各种元素,包括标题、段落、列表、代码
注意:Markdown 格式仅支持读取,不支持写入。
+### quote_char [string]
+
+用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
+
+### escape_char [string]
+
+用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
+
### file_filter_pattern [string]
文件过滤模式,用于过滤文件。若只想根据文件名称筛选,则直接写文件名称的正则;若同时想根据文件目录进行过滤,则表达式以`path`起始。
diff --git a/docs/zh/connector-v2/source/OssJindoFile.md
b/docs/zh/connector-v2/source/OssJindoFile.md
index 2c496a15a2..569d0f0e43 100644
--- a/docs/zh/connector-v2/source/OssJindoFile.md
+++ b/docs/zh/connector-v2/source/OssJindoFile.md
@@ -56,28 +56,30 @@ import ChangeLog from
'../changelog/connector-file-oss-jindo.md';
## 选项
-| 参数名 | 类型 | 必须 | 默认值 | 描述 |
-|--------|------|------|--------|------|
-| path | string | 是 | - | 目标目录路径 |
-| file_format_type | string | 是 | - | 文件类型 |
-| bucket | string | 是 | - | OSS 文件系统的桶地址 |
-| access_key | string | 是 | - | OSS 文件系统的访问密钥 |
-| access_secret | string | 是 | - | OSS 文件系统的访问密钥 |
-| endpoint | string | 是 | - | OSS 文件系统的端点 |
-| read_columns | list | 否 | - | 数据源的读取列列表 |
-| delimiter/field_delimiter | string | 否 | \001 for text and , for csv | 字段分隔符
|
-| row_delimiter | string | 否 | \n | 行分隔符 |
-| parse_partition_from_path | boolean | 否 | true | 控制是否从文件路径解析分区键和值 |
-| date_format | string | 否 | yyyy-MM-dd | 日期类型格式 |
-| datetime_format | string | 否 | yyyy-MM-dd HH:mm:ss | 日期时间类型格式 |
-| time_format | string | 否 | HH:mm:ss | 时间类型格式 |
-| skip_header_row_number | long | 否 | 0 | 跳过前几行 |
-| schema | config | 否 | - | 数据模式 |
-| sheet_name | string | 否 | - | Excel 工作表名称 |
-| xml_row_tag | string | 否 | - | XML 行标签 |
-| xml_use_attr_format | boolean | 否 | - | 是否使用 XML 属性格式 |
-| csv_use_header_line | boolean | 否 | false | 是否使用 CSV 标题行 |
-| file_filter_pattern | string | 否 | - | 文件过滤模式 |
+| 参数名 | 类型 | 必须 | 默认值 | 描述
|
+|---------------------------|---------|----|-----------------------------|-----------------------------------------|
+| path | string | 是 | - |
目标目录路径 |
+| file_format_type | string | 是 | - |
文件类型 |
+| bucket | string | 是 | - | OSS
文件系统的桶地址 |
+| access_key | string | 是 | - | OSS
文件系统的访问密钥 |
+| access_secret | string | 是 | - | OSS
文件系统的访问密钥 |
+| endpoint | string | 是 | - | OSS
文件系统的端点 |
+| read_columns | list | 否 | - |
数据源的读取列列表 |
+| delimiter/field_delimiter | string | 否 | \001 for text and , for csv |
字段分隔符 |
+| row_delimiter | string | 否 | \n |
行分隔符 |
+| parse_partition_from_path | boolean | 否 | true |
控制是否从文件路径解析分区键和值 |
+| date_format | string | 否 | yyyy-MM-dd |
日期类型格式 |
+| datetime_format | string | 否 | yyyy-MM-dd HH:mm:ss |
日期时间类型格式 |
+| time_format | string | 否 | HH:mm:ss |
时间类型格式 |
+| skip_header_row_number | long | 否 | 0 |
跳过前几行 |
+| schema | config | 否 | - |
数据模式 |
+| sheet_name | string | 否 | - |
Excel 工作表名称 |
+| xml_row_tag | string | 否 | - | XML
行标签 |
+| xml_use_attr_format | boolean | 否 | - |
是否使用 XML 属性格式 |
+| csv_use_header_line | boolean | 否 | false |
是否使用 CSV 标题行 |
+| file_filter_pattern | string | 否 | - |
文件过滤模式 |
+| quote_char | string | 否 | " |
用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。 |
+| escape_char | string | 否 | - | 用于在
CSV 字段内转义引号或其他特殊字符,使其不会结束字段。 |
## 变更日志
diff --git a/docs/zh/connector-v2/source/S3File.md
b/docs/zh/connector-v2/source/S3File.md
index 9e4f0b9fca..3d36b094e0 100644
--- a/docs/zh/connector-v2/source/S3File.md
+++ b/docs/zh/connector-v2/source/S3File.md
@@ -133,59 +133,59 @@ schema {
如果您将文件类型指定为`parquet` `orc`,则不需要schema选项,连接器可以自动找到上游数据的schema。
-| Orc数据类型 | SeaTunnel数据类型
|
-|----------------------------------|----------------------------------------------------------------|
-| BOOLEAN | BOOLEAN
|
-| INT | INT
|
-| BYTE | BYTE
|
-| SHORT | SHORT
|
-| LONG | LONG
|
-| FLOAT | FLOAT
|
-| DOUBLE | DOUBLE
|
-| BINARY | BINARY
|
-| STRING<br/>VARCHAR<br/>CHAR<br/> | STRING
|
-| DATE | LOCAL_DATE_TYPE
|
-| TIMESTAMP | LOCAL_DATE_TIME_TYPE
|
-| DECIMAL | DECIMAL
|
-| LIST(STRING) | STRING_ARRAY_TYPE
|
-| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE
|
-| LIST(TINYINT) | BYTE_ARRAY_TYPE
|
-| LIST(SMALLINT) | SHORT_ARRAY_TYPE
|
-| LIST(INT) | INT_ARRAY_TYPE
|
-| LIST(BIGINT) | LONG_ARRAY_TYPE
|
-| LIST(FLOAT) | FLOAT_ARRAY_TYPE
|
-| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE
|
+| Orc数据类型 | SeaTunnel数据类型 |
+|----------------------------------|-------------------------------|
+| BOOLEAN | BOOLEAN |
+| INT | INT |
+| BYTE | BYTE |
+| SHORT | SHORT |
+| LONG | LONG |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| BINARY | BINARY |
+| STRING<br/>VARCHAR<br/>CHAR<br/> | STRING |
+| DATE | LOCAL_DATE_TYPE |
+| TIMESTAMP | LOCAL_DATE_TIME_TYPE |
+| DECIMAL | DECIMAL |
+| LIST(STRING) | STRING_ARRAY_TYPE |
+| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE |
+| LIST(TINYINT) | BYTE_ARRAY_TYPE |
+| LIST(SMALLINT) | SHORT_ARRAY_TYPE |
+| LIST(INT) | INT_ARRAY_TYPE |
+| LIST(BIGINT) | LONG_ARRAY_TYPE |
+| LIST(FLOAT) | FLOAT_ARRAY_TYPE |
+| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE |
| Map<K,V> | MapType,K和V的类型将转换为SeaTunnel类型 |
-| STRUCT | SeaTunnelRowType
|
+| STRUCT | SeaTunnelRowType |
### Parquet文件类型
如果您将文件类型指定为`parquet` `orc`,则不需要schema选项,连接器可以自动找到上游数据的schema。
-| Parquet数据类型 | SeaTunnel数据类型 |
-|----------------------|----------------------------------------------------------------|
-| INT_8 | BYTE
|
-| INT_16 | SHORT
|
-| DATE | DATE
|
-| TIMESTAMP_MILLIS | TIMESTAMP
|
-| INT64 | LONG
|
-| INT96 | TIMESTAMP
|
-| BINARY | BYTES
|
-| FLOAT | FLOAT
|
-| DOUBLE | DOUBLE
|
-| BOOLEAN | BOOLEAN
|
-| FIXED_LEN_BYTE_ARRAY | TIMESTAMP<br/> DECIMAL
|
-| DECIMAL | DECIMAL
|
-| LIST(STRING) | STRING_ARRAY_TYPE
|
-| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE
|
-| LIST(TINYINT) | BYTE_ARRAY_TYPE
|
-| LIST(SMALLINT) | SHORT_ARRAY_TYPE
|
-| LIST(INT) | INT_ARRAY_TYPE
|
-| LIST(BIGINT) | LONG_ARRAY_TYPE
|
-| LIST(FLOAT) | FLOAT_ARRAY_TYPE
|
-| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE
|
+| Parquet数据类型 | SeaTunnel数据类型 |
+|----------------------|-------------------------------|
+| INT_8 | BYTE |
+| INT_16 | SHORT |
+| DATE | DATE |
+| TIMESTAMP_MILLIS | TIMESTAMP |
+| INT64 | LONG |
+| INT96 | TIMESTAMP |
+| BINARY | BYTES |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| BOOLEAN | BOOLEAN |
+| FIXED_LEN_BYTE_ARRAY | TIMESTAMP<br/> DECIMAL |
+| DECIMAL | DECIMAL |
+| LIST(STRING) | STRING_ARRAY_TYPE |
+| LIST(BOOLEAN) | BOOLEAN_ARRAY_TYPE |
+| LIST(TINYINT) | BYTE_ARRAY_TYPE |
+| LIST(SMALLINT) | SHORT_ARRAY_TYPE |
+| LIST(INT) | INT_ARRAY_TYPE |
+| LIST(BIGINT) | LONG_ARRAY_TYPE |
+| LIST(FLOAT) | FLOAT_ARRAY_TYPE |
+| LIST(DOUBLE) | DOUBLE_ARRAY_TYPE |
| Map<K,V> | MapType,K和V的类型将转换为SeaTunnel类型 |
-| STRUCT | SeaTunnelRowType
|
+| STRUCT | SeaTunnelRowType |
## 选项
@@ -221,6 +221,8 @@ schema {
| file_filter_pattern | string | 否 |
| 过滤模式,用于过滤文件。
|
| filename_extension | string | 否 | -
| 过滤文件名扩展名,用于过滤具有特定扩展名的文件。例如:`csv` `.txt` `json`
`.xml`。
|
| common-options | | 否 | -
|
数据源插件通用参数,请参考[数据源通用选项](../source-common-options.md)了解详情。
|
+| quote_char | string | 否 | "
| 用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
|
+| escape_char | string | 否 | -
| 用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
|
### delimiter/field_delimiter [string]
@@ -234,6 +236,14 @@ schema {
默认 `\n`。
+### quote_char [string]
+
+用于包裹 CSV 字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
+
+### escape_char [string]
+
+用于在 CSV 字段内转义引号或其他特殊字符,使其不会结束字段。
+
### file_filter_pattern [string]
文件过滤模式,用于过滤文件。若只想根据文件名称筛选,则直接写文件名称的正则;若同时想根据文件目录进行过滤,则表达式以`path`起始。
diff --git a/docs/zh/connector-v2/source/SftpFile.md
b/docs/zh/connector-v2/source/SftpFile.md
index 0182f53ba8..7f242e2ce9 100644
--- a/docs/zh/connector-v2/source/SftpFile.md
+++ b/docs/zh/connector-v2/source/SftpFile.md
@@ -110,6 +110,8 @@ import ChangeLog from '../changelog/connector-file-sftp.md';
| common-options | | 否 | - |
数据源插件通用参数,请参考[数据源通用选项](../source-common-options.md)了解详情。
|
| file_filter_modified_start | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的开始时间(包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
| file_filter_modified_end | string | 否 | - |
按照最后修改时间过滤文件。 要过滤的结束时间(不包括改时间),时间格式是:`yyyy-MM-dd HH:mm:ss`
|
+| quote_char | string | 否 | " | 用于包裹 CSV
字段的单字符,可保证包含逗号、换行符或引号的字段被正确解析。
|
+| escape_char | string | 否 | - | 用于在 CSV
字段内转义引号或其他特殊字符,使其不会结束字段。
|
### file_filter_pattern [string]
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileBaseSourceOptions.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileBaseSourceOptions.java
index f30c8dca2f..a103220c99 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileBaseSourceOptions.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/config/FileBaseSourceOptions.java
@@ -127,4 +127,18 @@ public class FileBaseSourceOptions extends FileBaseOptions
{
.withDescription(
"Whether to read the complete file as a single
chunk instead of splitting into chunks. "
+ "When enabled, the entire file content
will be read into memory at once.Only valid when file_format_type is binary.");
+
+ public static final Option<String> QUOTE_CHAR =
+ Options.key("quote_char")
+ .stringType()
+ .defaultValue("\"")
+ .withDescription(
+ "A single character that encloses CSV fields,
allowing fields with commas, line breaks, or quotes to be read correctly.");
+
+ public static final Option<String> ESCAPE_CHAR =
+ Options.key("escape_char")
+ .stringType()
+ .noDefaultValue()
+ .withDescription(
+ "A single character that allows the quote or other
special characters to appear inside a CSV field without ending the field.");
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
index dc1641e5f6..c0e8d2a90f 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
@@ -20,6 +20,7 @@ package
org.apache.seatunnel.connectors.seatunnel.file.source.reader;
import org.apache.seatunnel.shade.com.typesafe.config.Config;
import org.apache.seatunnel.shade.org.apache.commons.lang3.StringUtils;
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.api.source.Collector;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.type.BasicType;
@@ -81,6 +82,7 @@ public abstract class AbstractReadStrategy implements
ReadStrategy {
protected SeaTunnelRowType seaTunnelRowType;
protected SeaTunnelRowType seaTunnelRowTypeWithPartition;
protected Config pluginConfig;
+ protected ReadonlyConfig readonlyConfig;
protected List<String> fileNames = new ArrayList<>();
protected List<String> readPartitions = new ArrayList<>();
protected List<String> readColumns = new ArrayList<>();
@@ -198,6 +200,7 @@ public abstract class AbstractReadStrategy implements
ReadStrategy {
@Override
public void setPluginConfig(Config pluginConfig) {
this.pluginConfig = pluginConfig;
+ this.readonlyConfig = ReadonlyConfig.fromConfig(pluginConfig);
// Determine whether it is a compressed file
if
(pluginConfig.hasPath(FileBaseSourceOptions.ARCHIVE_COMPRESS_CODEC.key())) {
String archiveCompressCodec =
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategy.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategy.java
index 034563f94d..99e6603518 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategy.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategy.java
@@ -17,6 +17,8 @@
package org.apache.seatunnel.connectors.seatunnel.file.source.reader;
+import org.apache.seatunnel.shade.org.apache.commons.lang3.StringUtils;
+
import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode;
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.api.source.Collector;
@@ -115,9 +117,7 @@ public class CsvReadStrategy extends AbstractReadStrategy {
if (enableSplitFile && split.getLength() > -1) {
actualInputStream = safeSlice(inputStream, split.getStart(),
split.getLength());
}
- Builder builder =
-
CSVFormat.EXCEL.builder().setIgnoreEmptyLines(true).setDelimiter(getDelimiter());
- CSVFormat csvFormat = builder.build();
+ CSVFormat csvFormat = getCSVFormat();
// if enableSplitFile is true,no need to skip
if (!enableSplitFile) {
if (firstLineAsHeader) {
@@ -192,6 +192,20 @@ public class CsvReadStrategy extends AbstractReadStrategy {
}
}
+ private CSVFormat getCSVFormat() {
+ String quoteChar =
readonlyConfig.get(FileBaseSourceOptions.QUOTE_CHAR);
+ String escapeChar =
readonlyConfig.get(FileBaseSourceOptions.ESCAPE_CHAR);
+ Builder builder =
+
CSVFormat.EXCEL.builder().setIgnoreEmptyLines(true).setDelimiter(getDelimiter());
+ if (StringUtils.isNotEmpty(quoteChar)) {
+ builder.setQuote(quoteChar.charAt(0));
+ }
+ if (StringUtils.isNotEmpty(escapeChar)) {
+ builder.setEscape(escapeChar.charAt(0));
+ }
+ return builder.build();
+ }
+
private List<String> getHeaders(CSVParser csvParser) {
List<String> headers;
if (firstLineAsHeader) {
@@ -217,7 +231,6 @@ public class CsvReadStrategy extends AbstractReadStrategy {
"When reading csv files, if user has not specified schema
information, "
+ "SeaTunnel will not support column projection");
}
- ReadonlyConfig readonlyConfig =
ReadonlyConfig.fromConfig(pluginConfig);
CsvDeserializationSchema.Builder builder =
CsvDeserializationSchema.builder()
.delimiter(getDelimiter())
@@ -236,7 +249,6 @@ public class CsvReadStrategy extends AbstractReadStrategy {
}
private String getDelimiter() {
- ReadonlyConfig readonlyConfig =
ReadonlyConfig.fromConfig(pluginConfig);
return
readonlyConfig.getOptional(FileBaseSourceOptions.FIELD_DELIMITER).orElse(",");
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategyTest.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategyTest.java
index 0aa0189481..78574fc14a 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategyTest.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/CsvReadStrategyTest.java
@@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.type.BasicType;
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import
org.apache.seatunnel.connectors.seatunnel.file.config.FileBaseSourceOptions;
import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf;
import org.junit.jupiter.api.Assertions;
@@ -35,7 +36,9 @@ import lombok.extern.slf4j.Slf4j;
import java.net.URL;
import java.nio.file.Paths;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import static
org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT;
@@ -102,6 +105,53 @@ public class CsvReadStrategyTest {
Assertions.assertEquals(100,
testCollector.getRows().get(1).getField(2));
}
+ @Test
+ public void testSpecialQuoteCharForCsvRead() throws Exception {
+ URL resource =
+
CsvReadStrategyTest.class.getResource("/csv/special_quote_char_break_line.csv");
+ String path = Paths.get(resource.toURI()).toString();
+ CsvReadStrategy csvReadStrategy = new CsvReadStrategy();
+ LocalConf localConf = new LocalConf(FS_DEFAULT_NAME_DEFAULT);
+ csvReadStrategy.init(localConf);
+ csvReadStrategy.getFileNamesByPath(path);
+
csvReadStrategy.setPluginConfig(ConfigFactory.parseMap(getOptionsForSpecialQuoteChar()));
+ csvReadStrategy.setCatalogTable(
+ CatalogTableUtil.getCatalogTable(
+ "test",
+ new SeaTunnelRowType(
+ new String[] {"id", "name", "age"},
+ new SeaTunnelDataType[] {
+ BasicType.INT_TYPE, BasicType.STRING_TYPE,
BasicType.INT_TYPE
+ })));
+ TestCollector testCollector = new TestCollector();
+ csvReadStrategy.read(path, "", testCollector);
+ final List<SeaTunnelRow> rows = testCollector.getRows();
+ Assertions.assertEquals(4, rows.size());
+ if (isWindows()) {
+ Assertions.assertEquals("harry\r\n potter",
rows.get(0).getField(1));
+ } else {
+ Assertions.assertEquals("harry\n potter", rows.get(0).getField(1));
+ }
+ Assertions.assertEquals("tom", rows.get(1).getField(1));
+ Assertions.assertEquals("Rose`Wang", rows.get(2).getField(1));
+ if (isWindows()) {
+ Assertions.assertEquals("Jock\r\nLi`Li", rows.get(3).getField(1));
+ } else {
+ Assertions.assertEquals("Jock\nLi`Li", rows.get(3).getField(1));
+ }
+ }
+
+ private boolean isWindows() {
+ return System.getProperty("os.name").toLowerCase().contains("win");
+ }
+
+ private Map<String, Object> getOptionsForSpecialQuoteChar() {
+ Map<String, Object> map = new HashMap<>();
+ map.put(FileBaseSourceOptions.QUOTE_CHAR.key(), "`");
+ map.put(FileBaseSourceOptions.ESCAPE_CHAR.key(), "\"");
+ return map;
+ }
+
public static class TestCollector implements Collector<SeaTunnelRow> {
private final List<SeaTunnelRow> rows = new ArrayList<>();
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/csv/special_quote_char_break_line.csv
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/csv/special_quote_char_break_line.csv
new file mode 100644
index 0000000000..c4748fe719
--- /dev/null
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/test/resources/csv/special_quote_char_break_line.csv
@@ -0,0 +1,6 @@
+20,`harry
+ potter`,18
+21,`tom`,19
+22,`Rose"`Wang`,16
+23,`Jock
+Li"`Li`,17
\ No newline at end of file
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java
index 0fff40bc25..34fe47c28a 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java
@@ -91,6 +91,8 @@ public class CosFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java
index 3dfcbadc77..02df6d2989 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java
@@ -99,6 +99,8 @@ public class FtpFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.READ_COLUMNS)
.optional(FtpFileSourceOptions.FTP_REMOTE_VERIFICATION_ENABLED)
.optional(FtpFileSourceOptions.FTP_CONTROL_ENCODING)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java
index f7d343414a..7e0de70c6e 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java
@@ -99,6 +99,8 @@ public class HdfsFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.KERBEROS_KEYTAB_PATH)
.optional(FileBaseSourceOptions.KRB5_PATH)
.optional(FileBaseSourceOptions.REMOTE_USER)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/jindo/source/OssFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/jindo/source/OssFileSourceFactory.java
index 1076a0688b..de37c42014 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/jindo/source/OssFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/jindo/source/OssFileSourceFactory.java
@@ -86,6 +86,8 @@ public class OssFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java
index e6e0eabe07..9672f2cd80 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/source/LocalFileSourceFactory.java
@@ -102,6 +102,8 @@ public class LocalFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-obs/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/obs/source/ObsFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-obs/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/obs/source/ObsFileSourceFactory.java
index 7d8c5d4da4..0a4be5e038 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-obs/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/obs/source/ObsFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-obs/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/obs/source/ObsFileSourceFactory.java
@@ -75,6 +75,8 @@ public class ObsFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java
index deb22190f9..cc7bedd528 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java
@@ -97,6 +97,8 @@ public class OssFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java
index a3906b5e2a..96d0add798 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java
@@ -101,6 +101,8 @@ public class S3FileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java
b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java
index 576ee90f60..6ed53d3bed 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java
@@ -88,6 +88,8 @@ public class SftpFileSourceFactory implements
TableSourceFactory {
.optional(FileBaseSourceOptions.NULL_FORMAT)
.optional(FileBaseSourceOptions.FILENAME_EXTENSION)
.optional(FileBaseSourceOptions.READ_COLUMNS)
+ .optional(FileBaseSourceOptions.QUOTE_CHAR)
+ .optional(FileBaseSourceOptions.ESCAPE_CHAR)
.build();
}