This is an automated email from the ASF dual-hosted git repository.
shenghang pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new c4b96c2ba3 [Fix][Connector-V2] Solve the reading of special excel
(#9882)
c4b96c2ba3 is described below
commit c4b96c2ba3af1835440bd5d4c8717d6e43fdb61d
Author: corgy-w <[email protected]>
AuthorDate: Sat Sep 27 20:18:18 2025 +0800
[Fix][Connector-V2] Solve the reading of special excel (#9882)
---
.../file/source/reader/ExcelReadStrategy.java | 12 +++-
.../e2e/connector/file/local/LocalFileIT.java | 6 ++
.../src/test/resources/excel/special_excel.xlsx | Bin 0 -> 12209 bytes
.../resources/excel/special_excel_to_assert.conf | 68 +++++++++++++++++++++
4 files changed, 83 insertions(+), 3 deletions(-)
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
index 35106f9866..57d373c981 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
@@ -159,13 +159,19 @@ public class ExcelReadStrategy extends
AbstractReadStrategy {
cellCount = seaTunnelRowType.getTotalFields();
cellCount = partitionsMap.isEmpty() ? cellCount : cellCount +
partitionsMap.size();
SeaTunnelDataType<?>[] fieldTypes =
seaTunnelRowType.getFieldTypes();
- int rowCount = sheet.getPhysicalNumberOfRows();
- if (skipHeaderNumber > rowCount) {
+ int firstRowNum = sheet.getFirstRowNum();
+ int lastRowNum = sheet.getLastRowNum();
+ if (firstRowNum == -1 || lastRowNum == -1) {
+ return;
+ }
+ // Calculate the actual start row considering skipHeaderNumber
+ int startRow = Math.max(firstRowNum + (int) skipHeaderNumber,
firstRowNum);
+ if (startRow > lastRowNum) {
throw new FileConnectorException(
CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION,
"Skip the number of rows exceeds the maximum or
minimum limit of Sheet");
}
- IntStream.range((int) skipHeaderNumber, rowCount)
+ IntStream.range(startRow, lastRowNum + 1)
.mapToObj(sheet::getRow)
.filter(Objects::nonNull)
.forEach(
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
index 7832c0ae1a..3bf8d89eb3 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
@@ -291,6 +291,11 @@ public class LocalFileIT extends TestSuiteBase {
"/seatunnel/read/excel_filter_regex/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx",
container);
+ ContainerUtil.copyFileIntoContainers(
+ "/excel/special_excel.xlsx",
+ "/seatunnel/read/special_excel/special_excel.xlsx",
+ container);
+
ContainerUtil.copyFileIntoContainers(
"/csv/break_line.csv",
"/seatunnel/read/csv/break_line/break_line.csv",
@@ -323,6 +328,7 @@ public class LocalFileIT extends TestSuiteBase {
helper.execute("/excel/fake_to_local_excel.conf");
helper.execute("/excel/local_excel_to_assert.conf");
helper.execute("/excel/local_excel_projection_to_assert.conf");
+ helper.execute("/excel/special_excel_to_assert.conf");
// test write local text file
helper.execute("/text/fake_to_local_file_text.conf");
helper.execute("/text/local_file_text_lzo_to_assert.conf");
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel.xlsx
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel.xlsx
new file mode 100644
index 0000000000..30dfc52057
Binary files /dev/null and
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel.xlsx
differ
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel_to_assert.conf
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel_to_assert.conf
new file mode 100644
index 0000000000..37d3d6b0c4
--- /dev/null
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel_to_assert.conf
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+ # You can set spark configuration here
+ spark.app.name = "SeaTunnel"
+ spark.executor.instances = 2
+ spark.executor.cores = 1
+ spark.executor.memory = "1g"
+ spark.master = local
+ job.mode = "BATCH"
+}
+
+source {
+ LocalFile {
+ path = "/seatunnel/read/special_excel"
+ plugin_output = "fake"
+ file_format_type = excel
+ sheet_name=Sheet1
+ field_delimiter = ;
+ skip_header_row_number = 0
+ schema = {
+ fields {
+ A = string
+ B = string
+ C = string
+ D = string
+ E = string
+ F = string
+ G = string
+ H = string
+ }
+ }
+ }
+}
+
+sink {
+ Assert {
+ rules {
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 11
+ },
+ {
+ rule_type = MIN_ROW
+ rule_value = 11
+ }
+ ]
+ }
+ }
+}