This is an automated email from the ASF dual-hosted git repository.

exceptionfactory pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new 20c815dc12 NIFI-13304 Added SplitExcel Processor
20c815dc12 is described below

commit 20c815dc12b6db6c98af7364a379523270a1023b
Author: dan-s1 <dsti...@gmail.com>
AuthorDate: Fri Jun 14 18:55:49 2024 +0000

    NIFI-13304 Added SplitExcel Processor
    
    This closes #8981
    
    Signed-off-by: David Handermann <exceptionfact...@apache.org>
---
 .../nifi-poi-bundle/nifi-poi-nar/pom.xml           |   3 -
 .../nifi-poi-bundle/nifi-poi-services/pom.xml      |   8 +-
 .../java/org/apache/nifi/excel/ExcelReader.java    |  29 ---
 .../apache/nifi/processors/excel/SplitExcel.java   | 221 +++++++++++++++++++++
 .../services/org.apache.nifi.processor.Processor   |  16 ++
 .../nifi/processors/excel/TestSplitExcel.java      | 148 ++++++++++++++
 .../resources/excel/dataWithSharedFormula.xlsx     | Bin 0 -> 16865 bytes
 .../test/resources/excel/sheetsWithEmptySheet.xlsx | Bin 0 -> 7445 bytes
 .../{nifi-poi-nar => nifi-poi-utils}/pom.xml       |  26 +--
 .../java/org/apache/nifi/excel/ProtectionType.java |  47 +++++
 nifi-extension-bundles/nifi-poi-bundle/pom.xml     |   1 +
 11 files changed, 443 insertions(+), 56 deletions(-)

diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
index 075dc77336..9db159a85f 100644
--- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
+++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
@@ -21,11 +21,8 @@
         <artifactId>nifi-poi-bundle</artifactId>
         <version>2.0.0-SNAPSHOT</version>
     </parent>
-
     <artifactId>nifi-poi-nar</artifactId>
     <packaging>nar</packaging>
-
-
     <dependencies>
         <dependency>
             <groupId>org.apache.nifi</groupId>
diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml
index aa640c0e26..5fe26e6863 100644
--- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml
+++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml
@@ -30,10 +30,12 @@
                     <excludes combine.children="append">
                         
<exclude>src/test/resources/excel/collegeScorecard.xlsx</exclude>
                         
<exclude>src/test/resources/excel/dataformatting.xlsx</exclude>
+                        
<exclude>src/test/resources/excel/dataWithSharedFormula.xlsx</exclude>
                         <exclude>src/test/resources/excel/dates.xlsx</exclude>
                         
<exclude>src/test/resources/excel/notExcel.txt</exclude>
                         
<exclude>src/test/resources/excel/numbers.xlsx</exclude>
                         
<exclude>src/test/resources/excel/olderFormat.xls</exclude>
+                        
<exclude>src/test/resources/excel/sheetsWithEmptySheet.xlsx</exclude>
                         
<exclude>src/test/resources/excel/simpleDataFormatting.xlsx</exclude>
                         
<exclude>src/test/resources/excel/twoSheets.xlsx</exclude>
                     </excludes>
@@ -63,7 +65,6 @@
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>log4j-to-slf4j</artifactId>
         </dependency>
-
         <dependency>
             <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-record</artifactId>
@@ -82,8 +83,9 @@
             <version>2.0.0-SNAPSHOT</version>
         </dependency>
         <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
+            <groupId>org.apache.nifi</groupId>
+            <artifactId>nifi-poi-utils</artifactId>
+            <version>2.0.0-SNAPSHOT</version>
         </dependency>
     </dependencies>
 </project>
\ No newline at end of file
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
index 24d47f3a75..09c7f647f1 100644
--- 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
@@ -21,7 +21,6 @@ import 
org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.annotation.lifecycle.OnEnabled;
 import org.apache.nifi.components.AllowableValue;
-import org.apache.nifi.components.DescribedValue;
 import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.context.PropertyContext;
 import org.apache.nifi.controller.ConfigurationContext;
@@ -62,34 +61,6 @@ import java.util.Map;
         + "(XSSF 2007 OOXML file format) Excel documents and not older .xls 
(HSSF '97(-2007) file format) documents.")
 public class ExcelReader extends SchemaRegistryService implements 
RecordReaderFactory {
 
-    public enum ProtectionType implements DescribedValue {
-        UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a 
password"),
-        PASSWORD("Password Protected", "An Excel spreadsheet protected by a 
password");
-
-        ProtectionType(String displayName, String description) {
-            this.displayName = displayName;
-            this.description = description;
-        }
-
-        private final String displayName;
-        private final String description;
-
-        @Override
-        public String getValue() {
-            return name();
-        }
-
-        @Override
-        public String getDisplayName() {
-            return displayName;
-        }
-
-        @Override
-        public String getDescription() {
-            return description;
-        }
-    }
-
     public static final PropertyDescriptor REQUIRED_SHEETS = new 
PropertyDescriptor
             .Builder().name("Required Sheets")
             .displayName("Required Sheets")
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java
new file mode 100644
index 0000000000..86812f2e4f
--- /dev/null
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.excel;
+
+import com.github.pjfanning.xlsx.StreamingReader;
+import com.github.pjfanning.xlsx.exceptions.ExcelRuntimeException;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.excel.ProtectionType;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.poi.ss.usermodel.CellCopyPolicy;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
+
+@SideEffectFree
+@SupportsBatching
+@Tags({"split", "text"})
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@CapabilityDescription("Splits a multi sheet Microsoft Excel spreadsheet into 
multiple Microsoft Excel spreadsheets where each sheet from the original" +
+        " file is converted to an individual spreadsheet in its own flow file. 
 This processor is currently only capable of processing .xlsx "
+        + "(XSSF 2007 OOXML file format) Excel documents and not older .xls 
(HSSF '97(-2007) file format) documents.")
+@WritesAttributes({
+        @WritesAttribute(attribute = "fragment.identifier", description = "All 
split Excel FlowFiles produced from the same parent Excel FlowFile will have 
the same randomly generated UUID added" +
+                " for this attribute"),
+        @WritesAttribute(attribute = "fragment.index", description = "A one-up 
number that indicates the ordering of the split Excel FlowFiles that were 
created from a single parent Excel FlowFile"),
+        @WritesAttribute(attribute = "fragment.count", description = "The 
number of split Excel FlowFiles generated from the parent Excel FlowFile"),
+        @WritesAttribute(attribute = "segment.original.filename", description 
= "The filename of the parent Excel FlowFile"),
+        @WritesAttribute(attribute = SplitExcel.SHEET_NAME, description = "The 
name of the Excel sheet from the original spreadsheet."),
+        @WritesAttribute(attribute = SplitExcel.TOTAL_ROWS, description = "The 
number of rows in the Excel sheet from the original spreadsheet.")})
+public class SplitExcel extends AbstractProcessor {
+    public static final String SHEET_NAME = "sheetname";
+    public static final String TOTAL_ROWS = "total.rows";
+
+    public static final PropertyDescriptor PROTECTION_TYPE = new 
PropertyDescriptor.Builder()
+            .name("Protection Type")
+            .description("Specifies whether an Excel spreadsheet is protected 
by a password or not.")
+            .required(true)
+            .allowableValues(ProtectionType.class)
+            .defaultValue(ProtectionType.UNPROTECTED)
+            .build();
+
+    public static final PropertyDescriptor PASSWORD = new 
PropertyDescriptor.Builder()
+            .name("Password")
+            .description("The password for a password protected Excel 
spreadsheet")
+            .required(true)
+            .sensitive(true)
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .dependsOn(PROTECTION_TYPE, ProtectionType.PASSWORD)
+            .build();
+
+    public static final Relationship REL_ORIGINAL = new Relationship.Builder()
+            .name("original")
+            .description("The original FlowFile that was split into segments. 
If the FlowFile fails processing, nothing will be sent to this relationship")
+            .build();
+
+    public static final Relationship REL_FAILURE = new Relationship.Builder()
+            .name("failure")
+            .description("If a FlowFile cannot be transformed from the 
configured input format to the configured output format, the unchanged FlowFile 
will be routed to this relationship.")
+            .build();
+
+    public static final Relationship REL_SPLIT = new Relationship.Builder()
+            .name("split")
+            .description("The individual Excel 'segments' of the original 
Excel FlowFile will be routed to this relationship.")
+            .build();
+
+    private static final List<PropertyDescriptor> DESCRIPTORS = 
List.of(PROTECTION_TYPE, PASSWORD);
+    private static final Set<Relationship> RELATIONSHIPS = 
Set.of(REL_ORIGINAL, REL_FAILURE, REL_SPLIT);
+    private static final CellCopyPolicy CELL_COPY_POLICY = new 
CellCopyPolicy.Builder()
+            .cellFormula(CellCopyPolicy.DEFAULT_COPY_CELL_FORMULA_POLICY)
+            .cellStyle(CellCopyPolicy.DEFAULT_COPY_CELL_STYLE_POLICY)
+            .cellValue(CellCopyPolicy.DEFAULT_COPY_CELL_VALUE_POLICY)
+            .condenseRows(CellCopyPolicy.DEFAULT_CONDENSE_ROWS_POLICY)
+            .copyHyperlink(CellCopyPolicy.DEFAULT_COPY_HYPERLINK_POLICY)
+            .mergeHyperlink(CellCopyPolicy.DEFAULT_MERGE_HYPERLINK_POLICY)
+            .mergedRegions(CellCopyPolicy.DEFAULT_COPY_MERGED_REGIONS_POLICY)
+            .rowHeight(CellCopyPolicy.DEFAULT_COPY_ROW_HEIGHT_POLICY)
+            .build();
+
+    @Override
+    public Set<Relationship> getRelationships() {
+        return RELATIONSHIPS;
+    }
+
+    @Override
+    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return DESCRIPTORS;
+    }
+
+    @Override
+    public void onTrigger(ProcessContext context, ProcessSession session) 
throws ProcessException {
+        FlowFile originalFlowFile = session.get();
+        if (originalFlowFile == null) {
+            return;
+        }
+
+        final String password = context.getProperty(PASSWORD).getValue();
+        final List<WorkbookSplit> workbookSplits = new ArrayList<>();
+
+        try {
+            session.read(originalFlowFile, in -> {
+
+                final Workbook originalWorkbook = StreamingReader.builder()
+                        .rowCacheSize(100)
+                        .bufferSize(4096)
+                        .password(password)
+                        .setReadHyperlinks(true) // NOTE: Needed for copying 
rows.
+                        .setReadSharedFormulas(true) // NOTE: If not set to 
true, then data with shared formulas fail.
+                        .open(in);
+
+                int index = 0;
+                for (final Sheet originalSheet : originalWorkbook) {
+                    final String originalSheetName = 
originalSheet.getSheetName();
+                    try (XSSFWorkbook newWorkbook = new XSSFWorkbook()) {
+                        XSSFSheet newSheet = 
newWorkbook.createSheet(originalSheetName);
+                        List<Row> originalRows = new ArrayList<>();
+                        for (Row originalRow : originalSheet) {
+                            originalRows.add(originalRow);
+                        }
+
+                        if (!originalRows.isEmpty()) {
+                            newSheet.copyRows(originalRows, 
originalSheet.getFirstRowNum(), CELL_COPY_POLICY);
+                        }
+
+                        FlowFile newFlowFile = 
session.create(originalFlowFile);
+                        try (final OutputStream out = 
session.write(newFlowFile)) {
+                            newWorkbook.write(out);
+                            workbookSplits.add(new WorkbookSplit(index, 
newFlowFile, originalSheetName, originalRows.size()));
+                        }
+                    }
+
+                    index++;
+                }
+            });
+        } catch (ExcelRuntimeException | ProcessException e) {
+            getLogger().error("Failed to split {}", originalFlowFile, e);
+            session.remove(workbookSplits.stream()
+                    .map(WorkbookSplit::content)
+                    .toList());
+            workbookSplits.clear();
+            session.transfer(originalFlowFile, REL_FAILURE);
+            return;
+        }
+
+        final String fragmentId = UUID.randomUUID().toString();
+        final String originalFileName = 
originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
+        final int extensionIndex = originalFileName.lastIndexOf(".");
+        String originalFileNameWithoutExtension = originalFileName;
+        String originalFileNameExtension = "";
+
+        if (extensionIndex > -1) {
+            originalFileNameWithoutExtension = originalFileName.substring(0, 
extensionIndex);
+            originalFileNameExtension = 
originalFileName.substring(extensionIndex);
+        }
+
+        final Map<String, String> attributes = new HashMap<>();
+        attributes.put(FRAGMENT_COUNT.key(), 
String.valueOf(workbookSplits.size()));
+        attributes.put(FRAGMENT_ID.key(), fragmentId);
+        attributes.put(SEGMENT_ORIGINAL_FILENAME.key(), originalFileName);
+
+        for (WorkbookSplit split : workbookSplits) {
+            attributes.put(CoreAttributes.FILENAME.key(), 
String.format("%s-%s%s", originalFileNameWithoutExtension, split.index(), 
originalFileNameExtension));
+            attributes.put(FRAGMENT_INDEX.key(), 
Integer.toString(split.index()));
+            attributes.put(SHEET_NAME, split.sheetName());
+            attributes.put(TOTAL_ROWS, Integer.toString(split.numRows()));
+            session.putAllAttributes(split.content(), attributes);
+        }
+
+        session.transfer(originalFlowFile, REL_ORIGINAL);
+        final List<FlowFile> flowFileSplits = workbookSplits.stream()
+                .map(WorkbookSplit::content)
+                .toList();
+
+        session.transfer(flowFileSplits, REL_SPLIT);
+    }
+
+    private record WorkbookSplit(int index, FlowFile content, String 
sheetName, int numRows) {
+    }
+}
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
new file mode 100644
index 0000000000..ab2dc1b73a
--- /dev/null
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.nifi.processors.excel.SplitExcel
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java
new file mode 100644
index 0000000000..42a1e18e1a
--- /dev/null
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.excel;
+
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+public class TestSplitExcel {
+    private TestRunner runner;
+
+    @BeforeAll
+    static void setUpBeforeAll() throws Exception {
+        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+        try (XSSFWorkbook workbook = new XSSFWorkbook()) {
+            workbook.createSheet("User Info");
+            workbook.createSheet("Vehicle Info");
+            workbook.write(outputStream);
+        }
+    }
+
+    @BeforeEach
+    void setUp() {
+        runner = TestRunners.newTestRunner(SplitExcel.class);
+    }
+
+    @Test
+    void testSingleSheet() throws IOException {
+        Path singleSheet = Paths.get("src/test/resources/excel/dates.xlsx");
+        runner.enqueue(singleSheet);
+
+        runner.run();
+
+        runner.assertTransferCount(SplitExcel.REL_SPLIT, 1);
+        runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1);
+        runner.assertTransferCount(SplitExcel.REL_FAILURE, 0);
+    }
+
+    @Test
+    void testMultisheet() throws IOException {
+        Path multisheet = Paths.get("src/test/resources/excel/twoSheets.xlsx");
+        String fileName = multisheet.toFile().getName();
+        runner.enqueue(multisheet);
+
+        runner.run();
+
+        runner.assertTransferCount(SplitExcel.REL_SPLIT, 2);
+        runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1);
+        runner.assertTransferCount(SplitExcel.REL_FAILURE, 0);
+
+        List<MockFlowFile> flowFiles = 
runner.getFlowFilesForRelationship(SplitExcel.REL_SPLIT);
+        String expectedSheetNamesPrefix = "TestSheet";
+        List<String> expectedSheetSuffixes = List.of("A", "B");
+        List<Integer> expectedTotalRows = List.of(4, 3);
+
+        for (int index = 0; index < flowFiles.size(); index++) {
+            MockFlowFile flowFile = flowFiles.get(index);
+            assertNotNull(flowFile.getAttribute(FRAGMENT_ID.key()));
+            assertEquals(Integer.toString(index), 
flowFile.getAttribute(FRAGMENT_INDEX.key()));
+            assertEquals(Integer.toString(flowFiles.size()), 
flowFile.getAttribute(FRAGMENT_COUNT.key()));
+            assertEquals(fileName, 
flowFile.getAttribute(SEGMENT_ORIGINAL_FILENAME.key()));
+            assertEquals(expectedSheetNamesPrefix + 
expectedSheetSuffixes.get(index), flowFile.getAttribute(SplitExcel.SHEET_NAME));
+            assertEquals(expectedTotalRows.get(index).toString(), 
flowFile.getAttribute(SplitExcel.TOTAL_ROWS));
+        }
+    }
+
+    @Test
+    void testNonExcel() throws IOException {
+        Path nonExcel = Paths.get("src/test/resources/excel/notExcel.txt");
+        runner.enqueue(nonExcel);
+
+        runner.run();
+
+        runner.assertTransferCount(SplitExcel.REL_SPLIT, 0);
+        runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 0);
+        runner.assertTransferCount(SplitExcel.REL_FAILURE, 1);
+    }
+
+    @Test
+    void testWithEmptySheet() throws IOException {
+        Path sheetsWithEmptySheet = 
Paths.get("src/test/resources/excel/sheetsWithEmptySheet.xlsx");
+        String fileName = sheetsWithEmptySheet.toFile().getName();
+        runner.enqueue(sheetsWithEmptySheet);
+
+        runner.run();
+
+        runner.assertTransferCount(SplitExcel.REL_SPLIT, 3);
+        runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1);
+        runner.assertTransferCount(SplitExcel.REL_FAILURE, 0);
+
+        List<MockFlowFile> flowFiles = 
runner.getFlowFilesForRelationship(SplitExcel.REL_SPLIT);
+        List<String> expectedSheetSuffixes = List.of("TestSheetA", 
"TestSheetB", "emptySheet");
+        List<Integer> expectedTotalRows = List.of(4, 3, 0);
+
+        for (int index = 0; index < flowFiles.size(); index++) {
+            MockFlowFile flowFile = flowFiles.get(index);
+            assertNotNull(flowFile.getAttribute(FRAGMENT_ID.key()));
+            assertEquals(Integer.toString(index), 
flowFile.getAttribute(FRAGMENT_INDEX.key()));
+            assertEquals(Integer.toString(flowFiles.size()), 
flowFile.getAttribute(FRAGMENT_COUNT.key()));
+            assertEquals(fileName, 
flowFile.getAttribute(SEGMENT_ORIGINAL_FILENAME.key()));
+            assertEquals(expectedSheetSuffixes.get(index), 
flowFile.getAttribute(SplitExcel.SHEET_NAME));
+            assertEquals(expectedTotalRows.get(index).toString(), 
flowFile.getAttribute(SplitExcel.TOTAL_ROWS));
+        }
+    }
+
+    @Test
+    void testDataWithSharedFormula() throws IOException {
+        Path dataWithSharedFormula = 
Paths.get("src/test/resources/excel/dataWithSharedFormula.xlsx");
+        runner.enqueue(dataWithSharedFormula);
+
+        runner.run();
+
+        runner.assertTransferCount(SplitExcel.REL_SPLIT, 2);
+        runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1);
+        runner.assertTransferCount(SplitExcel.REL_FAILURE, 0);
+    }
+}
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/dataWithSharedFormula.xlsx
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/dataWithSharedFormula.xlsx
new file mode 100644
index 0000000000..c399b640cb
Binary files /dev/null and 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/dataWithSharedFormula.xlsx
 differ
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/sheetsWithEmptySheet.xlsx
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/sheetsWithEmptySheet.xlsx
new file mode 100644
index 0000000000..beafa5f08c
Binary files /dev/null and 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/sheetsWithEmptySheet.xlsx
 differ
diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml
similarity index 57%
copy from nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
copy to nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml
index 075dc77336..e4120939b2 100644
--- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml
+++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml
@@ -13,30 +13,14 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
     <modelVersion>4.0.0</modelVersion>
-
     <parent>
         <groupId>org.apache.nifi</groupId>
         <artifactId>nifi-poi-bundle</artifactId>
         <version>2.0.0-SNAPSHOT</version>
     </parent>
-
-    <artifactId>nifi-poi-nar</artifactId>
-    <packaging>nar</packaging>
-
-
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.nifi</groupId>
-            <artifactId>nifi-poi-services</artifactId>
-            <version>2.0.0-SNAPSHOT</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.nifi</groupId>
-            <artifactId>nifi-standard-shared-nar</artifactId>
-            <version>2.0.0-SNAPSHOT</version>
-            <type>nar</type>
-        </dependency>
-    </dependencies>
-</project>
+    <artifactId>nifi-poi-utils</artifactId>
+</project>
\ No newline at end of file
diff --git 
a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java
 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java
new file mode 100644
index 0000000000..92453cb2bc
--- /dev/null
+++ 
b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.excel;
+
+import org.apache.nifi.components.DescribedValue;
+
+public enum ProtectionType implements DescribedValue {
+    UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a 
password"),
+    PASSWORD("Password Protected", "An Excel spreadsheet protected by a 
password");
+
+    ProtectionType(String displayName, String description) {
+        this.displayName = displayName;
+        this.description = description;
+    }
+
+    private final String displayName;
+    private final String description;
+
+    @Override
+    public String getValue() {
+        return name();
+    }
+
+    @Override
+    public String getDisplayName() {
+        return displayName;
+    }
+
+    @Override
+    public String getDescription() {
+        return description;
+    }
+}
diff --git a/nifi-extension-bundles/nifi-poi-bundle/pom.xml 
b/nifi-extension-bundles/nifi-poi-bundle/pom.xml
index 93bd06d239..a8f57577e8 100644
--- a/nifi-extension-bundles/nifi-poi-bundle/pom.xml
+++ b/nifi-extension-bundles/nifi-poi-bundle/pom.xml
@@ -29,6 +29,7 @@
     <modules>
         <module>nifi-poi-nar</module>
         <module>nifi-poi-services</module>
+        <module>nifi-poi-utils</module>
     </modules>
     <dependencyManagement>
         <dependencies>

Reply via email to