This is an automated email from the ASF dual-hosted git repository. bodewig pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/ant.git
The following commit(s) were added to refs/heads/master by this push: new e04fbe7ff try to properly decode multibyte encoded names in tar entries e04fbe7ff is described below commit e04fbe7ffa4f549bdc00bdfce688fb587120eedd Author: Stefan Bodewig <stefan.bode...@innoq.com> AuthorDate: Sat Nov 26 18:47:07 2022 +0100 try to properly decode multibyte encoded names in tar entries --- src/main/org/apache/tools/tar/TarUtils.java | 71 +++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/src/main/org/apache/tools/tar/TarUtils.java b/src/main/org/apache/tools/tar/TarUtils.java index f6f103654..ffa24b1cb 100644 --- a/src/main/org/apache/tools/tar/TarUtils.java +++ b/src/main/org/apache/tools/tar/TarUtils.java @@ -26,6 +26,8 @@ package org.apache.tools.tar; import java.io.IOException; import java.math.BigInteger; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.WeakHashMap; import org.apache.tools.zip.ZipEncoding; import org.apache.tools.zip.ZipEncodingHelper; @@ -38,6 +40,10 @@ import org.apache.tools.zip.ZipEncodingHelper; public class TarUtils { private static final int BYTE_MASK = 255; + private static final String NUL = "\0"; + private static final String X = "X"; + private static final String X_NUL = "X\0"; + private static final WeakHashMap<ZipEncoding, byte[]> NUL_BY_ENCODING = new WeakHashMap<>(); static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(null); @@ -286,20 +292,75 @@ public class TarUtils { final ZipEncoding encoding) throws IOException { + final byte[] nul = getNulByteEquivalent(encoding); + final int nulLen = nul.length; int len = 0; - for (; len < length; ++len) { - if (buffer[offset + len] == 0) { - break; + if (nulLen == 1) { + final byte nulByte = nul[0]; + for (; len < length; ++len) { + if (buffer[offset + len] == nulByte) { + break; + } + } + } else if (nulLen == 0) { + len = length; + } else { + boolean found = false; + for (; len <= length - nulLen; ++len) { + // six-arg Arrays.equals requires Java 9 + byte[] atOffset = Arrays.copyOfRange(buffer, offset + len, offset + len + nulLen); + if (Arrays.equals(atOffset, nul)) { + found = true; + break; + } + } + if (!found) { + len = length; } } if (len > 0) { - final byte[] b = new byte[len]; - System.arraycopy(buffer, offset, b, 0, len); + final byte[] b = Arrays.copyOfRange(buffer, offset, offset + len); return encoding.decode(b); } return ""; } + private static byte[] getNulByteEquivalent(ZipEncoding encoding) throws IOException { + byte[] value = NUL_BY_ENCODING.get(encoding); + if (value == null) { + value = getUncachedNulByteEquivalent(encoding); + NUL_BY_ENCODING.put(encoding, value); + } + return value; + } + + private static byte[] getUncachedNulByteEquivalent(ZipEncoding encoding) throws IOException { + final ByteBuffer nulBuffer = encoding.encode(NUL); + final int nulLen = nulBuffer.limit() - nulBuffer.position(); + final byte[] nul = + Arrays.copyOfRange(nulBuffer.array(), nulBuffer.arrayOffset(), nulBuffer.arrayOffset() + nulLen); + if (nulLen <= 1) { + return nul; + } + final ByteBuffer xBuffer = encoding.encode(X); + final int xBufferLen = xBuffer.limit() - xBuffer.position(); + final ByteBuffer xNulBuffer = encoding.encode(X_NUL); + final int xNulBufferLen = xNulBuffer.limit() - xNulBuffer.position(); + if (xBufferLen >= xNulBufferLen) { + return nul; + } + final byte[] x = + Arrays.copyOfRange(xBuffer.array(), xBuffer.arrayOffset(), xBuffer.arrayOffset() + xBufferLen); + final byte[] nulPrefix = + Arrays.copyOfRange(xNulBuffer.array(), xNulBuffer.arrayOffset(), xNulBuffer.arrayOffset() + xBufferLen); + if (Arrays.equals(x, nulPrefix)) { + // strip of BOM or similar prefix + return Arrays.copyOfRange(xNulBuffer.array(), xNulBuffer.arrayOffset() + xBufferLen, + xNulBuffer.arrayOffset() + xNulBufferLen); + } + return nul; + } + /** * Copy a name into a buffer. * Copies characters from the name into the buffer