This is an automated email from the ASF dual-hosted git repository.

shenghang pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new c4b96c2ba3 [Fix][Connector-V2] Solve the reading of special excel 
(#9882)
c4b96c2ba3 is described below

commit c4b96c2ba3af1835440bd5d4c8717d6e43fdb61d
Author: corgy-w <[email protected]>
AuthorDate: Sat Sep 27 20:18:18 2025 +0800

    [Fix][Connector-V2] Solve the reading of special excel (#9882)
---
 .../file/source/reader/ExcelReadStrategy.java      |  12 +++-
 .../e2e/connector/file/local/LocalFileIT.java      |   6 ++
 .../src/test/resources/excel/special_excel.xlsx    | Bin 0 -> 12209 bytes
 .../resources/excel/special_excel_to_assert.conf   |  68 +++++++++++++++++++++
 4 files changed, 83 insertions(+), 3 deletions(-)

diff --git 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
index 35106f9866..57d373c981 100644
--- 
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
+++ 
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/ExcelReadStrategy.java
@@ -159,13 +159,19 @@ public class ExcelReadStrategy extends 
AbstractReadStrategy {
             cellCount = seaTunnelRowType.getTotalFields();
             cellCount = partitionsMap.isEmpty() ? cellCount : cellCount + 
partitionsMap.size();
             SeaTunnelDataType<?>[] fieldTypes = 
seaTunnelRowType.getFieldTypes();
-            int rowCount = sheet.getPhysicalNumberOfRows();
-            if (skipHeaderNumber > rowCount) {
+            int firstRowNum = sheet.getFirstRowNum();
+            int lastRowNum = sheet.getLastRowNum();
+            if (firstRowNum == -1 || lastRowNum == -1) {
+                return;
+            }
+            // Calculate the actual start row considering skipHeaderNumber
+            int startRow = Math.max(firstRowNum + (int) skipHeaderNumber, 
firstRowNum);
+            if (startRow > lastRowNum) {
                 throw new FileConnectorException(
                         CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION,
                         "Skip the number of rows exceeds the maximum or 
minimum limit of Sheet");
             }
-            IntStream.range((int) skipHeaderNumber, rowCount)
+            IntStream.range(startRow, lastRowNum + 1)
                     .mapToObj(sheet::getRow)
                     .filter(Objects::nonNull)
                     .forEach(
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
index 7832c0ae1a..3bf8d89eb3 100644
--- 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
@@ -291,6 +291,11 @@ public class LocalFileIT extends TestSuiteBase {
                         
"/seatunnel/read/excel_filter_regex/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx",
                         container);
 
+                ContainerUtil.copyFileIntoContainers(
+                        "/excel/special_excel.xlsx",
+                        "/seatunnel/read/special_excel/special_excel.xlsx",
+                        container);
+
                 ContainerUtil.copyFileIntoContainers(
                         "/csv/break_line.csv",
                         "/seatunnel/read/csv/break_line/break_line.csv",
@@ -323,6 +328,7 @@ public class LocalFileIT extends TestSuiteBase {
         helper.execute("/excel/fake_to_local_excel.conf");
         helper.execute("/excel/local_excel_to_assert.conf");
         helper.execute("/excel/local_excel_projection_to_assert.conf");
+        helper.execute("/excel/special_excel_to_assert.conf");
         // test write local text file
         helper.execute("/text/fake_to_local_file_text.conf");
         helper.execute("/text/local_file_text_lzo_to_assert.conf");
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel.xlsx
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel.xlsx
new file mode 100644
index 0000000000..30dfc52057
Binary files /dev/null and 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel.xlsx
 differ
diff --git 
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel_to_assert.conf
 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel_to_assert.conf
new file mode 100644
index 0000000000..37d3d6b0c4
--- /dev/null
+++ 
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/special_excel_to_assert.conf
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+  # You can set spark configuration here
+  spark.app.name = "SeaTunnel"
+  spark.executor.instances = 2
+  spark.executor.cores = 1
+  spark.executor.memory = "1g"
+  spark.master = local
+  job.mode = "BATCH"
+}
+
+source {
+  LocalFile {
+    path = "/seatunnel/read/special_excel"
+    plugin_output = "fake"
+    file_format_type = excel
+    sheet_name=Sheet1
+    field_delimiter = ;
+    skip_header_row_number = 0
+    schema = {
+      fields {
+        A =  string
+        B =  string
+        C =  string
+        D =  string
+        E =  string
+        F =  string
+        G =  string
+        H =  string
+      }
+    }
+  }
+}
+
+sink {
+  Assert {
+    rules {
+      row_rules = [
+        {
+          rule_type = MAX_ROW
+          rule_value = 11
+        },
+        {
+          rule_type = MIN_ROW
+          rule_value = 11
+        }
+      ]
+    }
+  }
+}

Reply via email to