umarhussain15 commented on code in PR #8350:
URL: https://github.com/apache/nifi/pull/8350#discussion_r1477060364


##########
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestUnpackContent.java:
##########
@@ -222,6 +225,88 @@ public void testInvalidZip() throws IOException {
             flowFile.assertContentEquals(path.toFile());
         }
     }
+    @Test
+    public void testZipEncodingField() {
+        final TestRunner unpackRunner = TestRunners.newTestRunner(new 
UnpackContent());
+        unpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT, 
UnpackContent.PackageFormat.ZIP_FORMAT.toString());
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
"invalid-encoding");
+        unpackRunner.assertNotValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "IBM437");
+        unpackRunner.assertValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "Cp437");
+        unpackRunner.assertValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
StandardCharsets.ISO_8859_1.name());
+        unpackRunner.assertValid();
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
StandardCharsets.UTF_8.name());
+        unpackRunner.assertValid();
+
+    }
+    @Test
+    public void testZipWithCp437Encoding() throws IOException {
+        String zipFilename = "windows-with-cp437.zip";
+        final TestRunner unpackRunner = TestRunners.newTestRunner(new 
UnpackContent());
+        final TestRunner autoUnpackRunner = TestRunners.newTestRunner(new 
UnpackContent());
+        unpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT, 
UnpackContent.PackageFormat.ZIP_FORMAT.toString());
+        unpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, "Cp437");
+        
unpackRunner.setProperty(UnpackContent.ALLOW_STORED_ENTRIES_WITH_DATA_DESCRIPTOR,
 "true"); // just forces this to be exercised
+
+        autoUnpackRunner.setProperty(UnpackContent.PACKAGING_FORMAT, 
UnpackContent.PackageFormat.AUTO_DETECT_FORMAT.toString());
+        autoUnpackRunner.setProperty(UnpackContent.ZIP_FILENAME_CHARSET, 
"Cp437");
+
+        unpackRunner.enqueue(dataPath.resolve(zipFilename));
+        unpackRunner.enqueue(dataPath.resolve(zipFilename));
+
+        Map<String, String> attributes = new HashMap<>(1);
+        attributes.put("mime.type", "application/zip");
+        autoUnpackRunner.enqueue(dataPath.resolve(zipFilename), attributes);
+        autoUnpackRunner.enqueue(dataPath.resolve(zipFilename), attributes);
+        unpackRunner.run(2);
+        autoUnpackRunner.run(2);
+
+        unpackRunner.assertTransferCount(UnpackContent.REL_FAILURE, 0);
+        autoUnpackRunner.assertTransferCount(UnpackContent.REL_FAILURE, 0);
+
+        final List<MockFlowFile> unpacked =
+            
unpackRunner.getFlowFilesForRelationship(UnpackContent.REL_SUCCESS);
+        for (final MockFlowFile flowFile : unpacked) {
+            final String filename = 
flowFile.getAttribute(CoreAttributes.FILENAME.key());
+            assertTrue(StringUtils.containsNone(filename, "?"), "filename 
contains '?': " + filename);
+            assertTrue(StringUtils.containsNone(filename, "�"), "filename 
contains '�': " + filename);
+            final String path = 
flowFile.getAttribute(CoreAttributes.PATH.key());
+            assertTrue(StringUtils.containsNone(path, "?"), "path contains 
'?': " + path);
+            assertTrue(StringUtils.containsNone(path, "�"), "path contains 
'�': " + path);

Review Comment:
   Thanks. I have updated the test cases to generate zip with known Unicode 
character and to look for it in the output filename.
   The test case with Windows zip file now only look for `?` in test case as 
the input contains both files with special character in names and without any 
special character in name. Also, UnpackContent is always outputting `?` for 
this zip file (the original issue reported in the ticket).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to