Re: [PR] fix: Restore incremental file name handling [commons-compress]

via GitHub Tue, 26 Aug 2025 23:25:18 -0700


ppkarwasz commented on code in PR #698:
URL: https://github.com/apache/commons-compress/pull/698#discussion_r2302984668



##########
src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java:
##########
@@ -599,4 +607,121 @@ void testWriteNegativeBinary8Byte() {
         assertEquals(-3601L, TarUtils.parseOctalOrBinary(b, 0, 8));
     }
 
+    /**
+     * Builds an NTFS-style path (\\?\C:\...) up to a target total UTF-16 
length, respecting 255-unit segments.
+     */
+    private static String createNtfsLongNameByUtf16Units(final int totalUnits) 
{
+        final String prefix = "\\\\?\\C:\\";
+        final String extension = ".txt";
+
+        // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => lets us 
pack 255 units per segment easily
+        final String segment = StringUtils.repeat("★", 255);
+        assertEquals(255, segment.length(), "Segment length should be 255 
UTF-16 code units");
+
+        final StringBuilder sb = new StringBuilder(prefix);
+        while (sb.length() + extension.length() < totalUnits) {
+            sb.append(segment).append('\\');
+        }
+
+        // Trim to exact totalUnits (UTF-16 units), then append extension
+        sb.setLength(totalUnits - extension.length());
+        sb.append(extension);
+        assertEquals(totalUnits, sb.length(), "Final length should be " + 
totalUnits + " UTF-16 code units");
+        return sb.toString();
+    }
+
+    /**
+     * Builds a POSIX-style path (rooted at `/`) up to a target total *byte* 
length in UTF-8, 255 bytes/segment.
+     */
+    private static String createPosixLongNameByUtf8Bytes(final int totalBytes) 
{
+        final String extension = ".txt";
+        // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => 85 * 3 
UTF-8 bytes = 255 bytes
+        final String segment = StringUtils.repeat("★", 85);
+        assertEquals(255, utf8Len(segment), "Segment length should be 255 
bytes in UTF-8");
+
+        final StringBuilder sb = new StringBuilder();
+        int count = totalBytes / 256; // how many full 256-byte chunks can we 
fit?
+        while (count-- > 0) {
+            sb.append(segment).append('/');
+        }
+        count = totalBytes - utf8Len(sb) - utf8Len(extension);
+        while (count-- > 0) {
+            sb.append('a');
+        }
+        sb.append(extension);
+        assertEquals(totalBytes, utf8Len(sb), "Final length should be " + 
totalBytes + " bytes in UTF-8");
+        return sb.toString();
+    }
+
+    private static int utf8Len(final CharSequence s) {
+        return s.toString().getBytes(UTF_8).length;
+    }
+
+    private static byte[] utf8Bytes(final String s) {
+        return s.getBytes(UTF_8);
+    }
+
+    private static byte[] paddedUtf8Bytes(final String s) {
+        final int blockSize = 1024;
+        final byte[] bytes = s.getBytes(UTF_8);
+        return Arrays.copyOf(bytes, ((bytes.length + blockSize - 1) / 
blockSize) * blockSize);
+    }
+
+    static Stream<Arguments> readLongNameHandlesLimits() {
+        final String empty = "";
+        final String ntfsLongName = createNtfsLongNameByUtf16Units(32767);
+        final String posixLongName = createPosixLongNameByUtf8Bytes(4095);
+        return Stream.of(
+                Arguments.of("Empty", empty, utf8Bytes(empty)),
+                Arguments.of("Empty (padded)", empty, paddedUtf8Bytes(empty)),
+                Arguments.of("NTFS", ntfsLongName, utf8Bytes(ntfsLongName)),
+                Arguments.of("NTFS (padded)", ntfsLongName, 
paddedUtf8Bytes(ntfsLongName)),
+                Arguments.of("POSIX", posixLongName, utf8Bytes(posixLongName)),
+                Arguments.of("POSIX (padded)", posixLongName, 
paddedUtf8Bytes(posixLongName)));
+    }
+
+    @ParameterizedTest(name = "{0} long name is read correctly")
+    @MethodSource
+    void readLongNameHandlesLimits(final String kind, final String 
expectedName, final byte[] data) throws IOException {

Review Comment:
   Fixed in 
https://github.com/apache/commons-compress/pull/698/commits/379e8d3a08172bf0a709625962276cd9b3b12569



##########
src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java:
##########
@@ -599,4 +607,121 @@ void testWriteNegativeBinary8Byte() {
         assertEquals(-3601L, TarUtils.parseOctalOrBinary(b, 0, 8));
     }
 
+    /**
+     * Builds an NTFS-style path (\\?\C:\...) up to a target total UTF-16 
length, respecting 255-unit segments.
+     */
+    private static String createNtfsLongNameByUtf16Units(final int totalUnits) 
{
+        final String prefix = "\\\\?\\C:\\";
+        final String extension = ".txt";
+
+        // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => lets us 
pack 255 units per segment easily
+        final String segment = StringUtils.repeat("★", 255);
+        assertEquals(255, segment.length(), "Segment length should be 255 
UTF-16 code units");
+
+        final StringBuilder sb = new StringBuilder(prefix);
+        while (sb.length() + extension.length() < totalUnits) {
+            sb.append(segment).append('\\');
+        }
+
+        // Trim to exact totalUnits (UTF-16 units), then append extension
+        sb.setLength(totalUnits - extension.length());
+        sb.append(extension);
+        assertEquals(totalUnits, sb.length(), "Final length should be " + 
totalUnits + " UTF-16 code units");
+        return sb.toString();
+    }
+
+    /**
+     * Builds a POSIX-style path (rooted at `/`) up to a target total *byte* 
length in UTF-8, 255 bytes/segment.
+     */
+    private static String createPosixLongNameByUtf8Bytes(final int totalBytes) 
{
+        final String extension = ".txt";
+        // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => 85 * 3 
UTF-8 bytes = 255 bytes
+        final String segment = StringUtils.repeat("★", 85);
+        assertEquals(255, utf8Len(segment), "Segment length should be 255 
bytes in UTF-8");
+
+        final StringBuilder sb = new StringBuilder();
+        int count = totalBytes / 256; // how many full 256-byte chunks can we 
fit?
+        while (count-- > 0) {
+            sb.append(segment).append('/');
+        }
+        count = totalBytes - utf8Len(sb) - utf8Len(extension);
+        while (count-- > 0) {
+            sb.append('a');
+        }
+        sb.append(extension);
+        assertEquals(totalBytes, utf8Len(sb), "Final length should be " + 
totalBytes + " bytes in UTF-8");
+        return sb.toString();
+    }
+
+    private static int utf8Len(final CharSequence s) {
+        return s.toString().getBytes(UTF_8).length;
+    }
+
+    private static byte[] utf8Bytes(final String s) {
+        return s.getBytes(UTF_8);
+    }
+
+    private static byte[] paddedUtf8Bytes(final String s) {
+        final int blockSize = 1024;
+        final byte[] bytes = s.getBytes(UTF_8);
+        return Arrays.copyOf(bytes, ((bytes.length + blockSize - 1) / 
blockSize) * blockSize);
+    }
+
+    static Stream<Arguments> readLongNameHandlesLimits() {
+        final String empty = "";
+        final String ntfsLongName = createNtfsLongNameByUtf16Units(32767);
+        final String posixLongName = createPosixLongNameByUtf8Bytes(4095);
+        return Stream.of(
+                Arguments.of("Empty", empty, utf8Bytes(empty)),
+                Arguments.of("Empty (padded)", empty, paddedUtf8Bytes(empty)),
+                Arguments.of("NTFS", ntfsLongName, utf8Bytes(ntfsLongName)),
+                Arguments.of("NTFS (padded)", ntfsLongName, 
paddedUtf8Bytes(ntfsLongName)),
+                Arguments.of("POSIX", posixLongName, utf8Bytes(posixLongName)),
+                Arguments.of("POSIX (padded)", posixLongName, 
paddedUtf8Bytes(posixLongName)));
+    }
+
+    @ParameterizedTest(name = "{0} long name is read correctly")
+    @MethodSource
+    void readLongNameHandlesLimits(final String kind, final String 
expectedName, final byte[] data) throws IOException {
+        final TarArchiveEntry entry = new TarArchiveEntry("test");
+        entry.setSize(data.length);
+        // Lets add a trailing "garbage" to ensure we only read what we should
+        final byte[] dataWithGarbage = Arrays.copyOf(data, data.length + 1024);
+        Arrays.fill(dataWithGarbage, data.length, dataWithGarbage.length, 
(byte) 0xFF);
+
+        try (InputStream in = new ByteArrayInputStream(dataWithGarbage)) {
+            final String actualName = TarUtils.readLongName(in, 
ZipEncodingHelper.getZipEncoding(UTF_8), entry);
+            assertEquals(
+                    expectedName,
+                    actualName,
+                    () -> String.format("[%s] The long name read does not 
match the expected value.", kind));
+        }
+    }
+
+    static Stream<Arguments> readLongNameThrowsOnTruncation() {
+        return Stream.of(
+                Arguments.of(Integer.MAX_VALUE, "truncated long name"),
+                Arguments.of(Long.MAX_VALUE, "invalid long name"));
+    }
+
+    @ParameterizedTest(name = "readLongName of {0} bytes throws 
ArchiveException")
+    @MethodSource
+    void readLongNameThrowsOnTruncation(final long size, final CharSequence 
expectedMessage) throws IOException {

Review Comment:
   Fixed in 
https://github.com/apache/commons-compress/pull/698/commits/379e8d3a08172bf0a709625962276cd9b3b12569



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] fix: Restore incremental file name handling [commons-compress]

Reply via email to