Author: bodewig Date: Sat Aug 10 16:22:49 2013 New Revision: 1512789 URL: http://svn.apache.org/r1512789 Log: Add encoding support to DumpArchiveInputStream - related to COMPRESS-180
Modified: commons/proper/compress/trunk/src/changes/changes.xml commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java Modified: commons/proper/compress/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/changes/changes.xml (original) +++ commons/proper/compress/trunk/src/changes/changes.xml Sat Aug 10 16:22:49 2013 @@ -95,6 +95,10 @@ The <action> type attribute can be add,u TarArchiveOutputStream now properly handles link names that are too long to fit into a traditional TAR header. </action> + <action type="add" date="2013-08-10"> + DumpArchiveInputStream now supports an encoding parameter that + can be used to specify the default encoding of file names. + </action> </release> <release version="1.5" date="2013-03-14" description="Release 1.5"> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java Sat Aug 10 16:22:49 2013 @@ -116,7 +116,7 @@ public class ArchiveStreamFactory { private String entryEncoding = null; /** - * Returns the encoding to use for arj, zip and tar files, + * Returns the encoding to use for arj, zip, dump and tar files, * or null for the default. * * @return entry encoding, or null @@ -127,8 +127,8 @@ public class ArchiveStreamFactory { } /** - * Sets the encoding to use for arj, zip and tar files. - * Use null for the default. + * Sets the encoding to use for arj, zip, dump and tar files. Use + * null for the default. * * @since 1.5 */ @@ -188,7 +188,11 @@ public class ArchiveStreamFactory { return new CpioArchiveInputStream(in); } if (DUMP.equalsIgnoreCase(archiverName)) { - return new DumpArchiveInputStream(in); + if (entryEncoding != null) { + return new DumpArchiveInputStream(in, entryEncoding); + } else { + return new DumpArchiveInputStream(in); + } } throw new ArchiveException("Archiver: " + archiverName + " not found."); Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java Sat Aug 10 16:22:49 2013 @@ -20,6 +20,8 @@ package org.apache.commons.compress.arch import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import java.io.EOFException; import java.io.IOException; @@ -39,6 +41,11 @@ import java.util.Stack; * the archive, and the read each entry as a normal input stream * using read(). * + * There doesn't seem to exist a hint on the encoding of string values + * in any piece documentation. Given the main purpose of dump/restore + * is backing up a system it seems very likely the format uses the + * current default encoding of the system. + * * @NotThreadSafe */ public class DumpArchiveInputStream extends ArchiveInputStream { @@ -65,14 +72,34 @@ public class DumpArchiveInputStream exte private Queue<DumpArchiveEntry> queue; /** - * Constructor. + * The encoding to use for filenames and labels. + */ + private final ZipEncoding encoding; + + /** + * Constructor using the platform's default encoding for file + * names. * * @param is * @throws ArchiveException */ public DumpArchiveInputStream(InputStream is) throws ArchiveException { + this(is, null); + } + + /** + * Constructor. + * + * @param is + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @since 1.6 + */ + public DumpArchiveInputStream(InputStream is, String encoding) + throws ArchiveException { this.raw = new TapeInputStream(is); this.hasHitEOF = false; + this.encoding = ZipEncodingHelper.getZipEncoding(encoding); try { // read header, verify it's a dump archive. @@ -83,7 +110,7 @@ public class DumpArchiveInputStream exte } // get summary information - summary = new DumpArchiveSummary(headerBytes); + summary = new DumpArchiveSummary(headerBytes, this.encoding); // reset buffer with actual block size. raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); @@ -324,7 +351,7 @@ public class DumpArchiveInputStream exte byte type = blockBuffer[i + 6]; - String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset? + String name = DumpArchiveUtil.decode(encoding, blockBuffer, i + 8, blockBuffer[i + 7]); if (".".equals(name) || "..".equals(name)) { // do nothing... Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java Sat Aug 10 16:22:49 2013 @@ -18,8 +18,10 @@ */ package org.apache.commons.compress.archivers.dump; +import java.io.IOException; import java.util.Date; +import org.apache.commons.compress.archivers.zip.ZipEncoding; /** * This class represents identifying information about a Dump archive volume. @@ -41,15 +43,15 @@ public class DumpArchiveSummary { private int firstrec; private int ntrec; - DumpArchiveSummary(byte[] buffer) { + DumpArchiveSummary(byte[] buffer, ZipEncoding encoding) throws IOException { dumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 4); previousDumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 8); volume = DumpArchiveUtil.convert32(buffer, 12); - label = new String(buffer, 676, DumpArchiveConstants.LBLSIZE).trim(); // TODO default charset? + label = DumpArchiveUtil.decode(encoding, buffer, 676, DumpArchiveConstants.LBLSIZE).trim(); level = DumpArchiveUtil.convert32(buffer, 692); - filesys = new String(buffer, 696, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset? - devname = new String(buffer, 760, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset? - hostname = new String(buffer, 824, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset? + filesys = DumpArchiveUtil.decode(encoding, buffer, 696, DumpArchiveConstants.NAMELEN).trim(); + devname = DumpArchiveUtil.decode(encoding, buffer, 760, DumpArchiveConstants.NAMELEN).trim(); + hostname = DumpArchiveUtil.decode(encoding, buffer, 824, DumpArchiveConstants.NAMELEN).trim(); flags = DumpArchiveUtil.convert32(buffer, 888); firstrec = DumpArchiveUtil.convert32(buffer, 892); ntrec = DumpArchiveUtil.convert32(buffer, 896); Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java Sat Aug 10 16:22:49 2013 @@ -18,6 +18,8 @@ */ package org.apache.commons.compress.archivers.dump; +import java.io.IOException; +import org.apache.commons.compress.archivers.zip.ZipEncoding; /** * Various utilities for dump archives. @@ -130,4 +132,14 @@ class DumpArchiveUtil { return i; } + + /** + * Decodes a byte array to a string. + */ + static String decode(ZipEncoding encoding, byte[] b, int offset, int len) + throws IOException { + byte[] copy = new byte[len]; + System.arraycopy(b, offset, copy, 0, len); + return encoding.decode(copy); + } } Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java Sat Aug 10 16:22:49 2013 @@ -30,6 +30,7 @@ import java.util.zip.CRC32; import org.apache.commons.compress.utils.BoundedInputStream; import org.apache.commons.compress.utils.CRC32VerifyingInputStream; +import org.apache.commons.compress.utils.CharsetNames; /** * Reads a 7z file, using RandomAccessFile under @@ -670,7 +671,7 @@ public class SevenZFile { int nextName = 0; for (int i = 0; i < names.length; i += 2) { if (names[i] == 0 && names[i+1] == 0) { - files[nextFile++].setName(new String(names, nextName, i-nextName, "UTF-16LE")); + files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE)); nextName = i + 2; } } Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java?rev=1512789&r1=1512788&r2=1512789&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java Sat Aug 10 16:22:49 2013 @@ -239,7 +239,15 @@ public class TarUtils { // Helper method to generate the exception message private static String exceptionMessage(byte[] buffer, final int offset, final int length, int current, final byte currentByte) { - String string = new String(buffer, offset, length); // TODO default charset? + // default charset is good enough for an exception message, + // + // the alternative was to modify parseOctal and + // parseOctalOrBinary to receive the ZipEncoding of the + // archive (deprecating the existing public methods, of + // course) and dealing with the fact that ZipEncoding#decode + // can throw an IOException which parseOctal* doesn't declare + String string = new String(buffer, offset, length); + string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; return s;