dan-s1 commented on code in PR #7016:
URL: https://github.com/apache/nifi/pull/7016#discussion_r1133945660


##########
nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java:
##########
@@ -299,55 +225,62 @@ public void process(InputStream inputStream) throws 
IOException {
         }
     }
 
+    private List<Integer> getColumnsToSkip(final ProcessContext context, 
FlowFile flowFile) {
+        final String[] columnsToSkip = 
StringUtils.split(context.getProperty(COLUMNS_TO_SKIP)
+                .evaluateAttributeExpressions(flowFile).getValue(), ",");
+
+        if (columnsToSkip != null) {
+            try {
+                return Arrays.stream(columnsToSkip)
+                        .map(columnToSkip -> Integer.parseInt(columnToSkip) - 
1)
+                        .collect(Collectors.toList());
+            } catch (NumberFormatException e) {
+                throw new ProcessException("Invalid column in Columns to Skip 
list.", e);
+            }
+        }
+
+        return new ArrayList<>();
+    }
+
+    private Map<String, Boolean> getDesiredSheets(final ProcessContext 
context, FlowFile flowFile) {
+        final String desiredSheetsDelimited = 
context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
+        if (desiredSheetsDelimited != null) {
+            String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, 
DESIRED_SHEETS_DELIMITER);
+            if(desiredSheets != null) {
+                return Arrays.stream(desiredSheets)
+                        .collect(Collectors.toMap(key -> key, value -> 
Boolean.FALSE));
+            } else {
+                getLogger().debug("Excel document was parsed but no sheets 
with the specified desired names were found.");
+            }
+        }
+
+        return new HashMap<>();
+    }
 
     /**
      * Handles an individual Excel sheet from the entire Excel document. Each 
sheet will result in an individual flowfile.
      *
-     * @param session
-     *  The NiFi ProcessSession instance for the current invocation.
+     * @param session The NiFi ProcessSession instance for the current 
invocation.
      */
-    private void handleExcelSheet(ProcessSession session, FlowFile 
originalParentFF, final InputStream sheetInputStream, ExcelSheetReadConfig 
readConfig,
-                                  CSVFormat csvFormat) throws IOException {
+    private void handleExcelSheet(ProcessSession session, FlowFile 
originalParentFF, final Sheet sheet, ExcelSheetReadConfig readConfig,
+                                  CSVFormat csvFormat) {
 
         FlowFile ff = session.create(originalParentFF);
+        final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);
         try {
-            final DataFormatter formatter = new DataFormatter();
-            final InputSource sheetSource = new InputSource(sheetInputStream);
-
-            final SheetToCSV sheetHandler = new SheetToCSV(readConfig, 
csvFormat);
-
-            final XMLReader parser = SAXHelper.newXMLReader();
-
-            //If Value Formatting is set to false then don't pass in the 
styles table.
-            // This will cause the XSSF Handler to return the raw value 
instead of the formatted one.
-            final StylesTable sst = 
readConfig.getFormatValues()?readConfig.getStyles():null;
-
-            final XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(
-                    sst, null, readConfig.getSharedStringsTable(), 
sheetHandler, formatter, false);
-
-            parser.setContentHandler(handler);
-
-            ff = session.write(ff, new OutputStreamCallback() {
-                @Override
-                public void process(OutputStream out) throws IOException {
-                    PrintStream outPrint = new PrintStream(out, false, 
StandardCharsets.UTF_8.name());
-                    sheetHandler.setOutput(outPrint);
-
-                    try {
-                        parser.parse(sheetSource);
-
-                        sheetInputStream.close();
-
-                        sheetHandler.close();
-                        outPrint.close();
-                    } catch (SAXException se) {
-                        getLogger().error("Error occurred while processing 
Excel sheet {}", new Object[]{readConfig.getSheetName()}, se);
-                    }
-                }
+            ff = session.write(ff, out -> {
+                PrintStream outPrint = new PrintStream(out, false, 
StandardCharsets.UTF_8);
+                sheetHandler.setOutput(outPrint);
+                sheet.forEach(row -> {
+                    sheetHandler.startRow(row.getRowNum());
+                    row.forEach(sheetHandler::cell);
+                    sheetHandler.endRow();
+                });
+                sheetHandler.close();

Review Comment:
   `outPrint.close()` is no longer there and the PrintStream represented with 
the variable `outPrint` is closed in the sheetHandler's close method.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@nifi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to