Author: bodewig Date: Mon Nov 7 16:36:51 2011 New Revision: 1198806 URL: http://svn.apache.org/viewvc?rev=1198806&view=rev Log: support uncompressing concatenated .bz2 files. COMPRESS-146. Submitted by Lasse Collin
Modified: commons/proper/compress/trunk/src/changes/changes.xml commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java Modified: commons/proper/compress/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1198806&r1=1198805&r2=1198806&view=diff ============================================================================== --- commons/proper/compress/trunk/src/changes/changes.xml (original) +++ commons/proper/compress/trunk/src/changes/changes.xml Mon Nov 7 16:36:51 2011 @@ -49,6 +49,10 @@ The <action> type attribute can be add,u <action issue="COMPRESS-156" type="add" date="2011-11-02"> Support for the XZ format has been added. </action> + <action issue="COMPRESS-146" type="update" date="2011-11-07"> + BZip2CompressorInputStream now optionally supports reading of + concatenated .bz2 files. + </action> </release> <release version="1.3" date="2011-11-01" description="Release 1.3 - API compatible to 1.2 but requires Java5 at runtime"> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java?rev=1198806&r1=1198805&r2=1198806&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java Mon Nov 7 16:36:51 2011 @@ -62,6 +62,7 @@ public class BZip2CompressorInputStream private int nInUse; private InputStream in; + private final boolean decompressConcatenated; private int currentChar = -1; @@ -97,8 +98,9 @@ public class BZip2CompressorInputStream private BZip2CompressorInputStream.Data data; /** - * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the - * specified stream. + * Constructs a new BZip2CompressorInputStream which decompresses bytes + * read from the specified stream. This doesn't suppprt decompressing + * concatenated .bz2 files. * * @throws IOException * if the stream content is malformed or an I/O error occurs. @@ -106,10 +108,37 @@ public class BZip2CompressorInputStream * if <tt>in == null</tt> */ public BZip2CompressorInputStream(final InputStream in) throws IOException { + this(in, false); + } + + /** + * Constructs a new BZip2CompressorInputStream which decompresses bytes + * read from the specified stream. + * + * @param inputStream the InputStream from which this object should + * be created of + * @param decompressConcatenated + * if true, decompress until the end of the input; + * if false, stop after the first .bz2 stream and + * leave the input position to point to the next + * byte after the .bz2 stream + * + * @throws IOException + * if the stream content is malformed or an I/O error occurs. + * @throws NullPointerException + * if <tt>in == null</tt> + */ + public BZip2CompressorInputStream(final InputStream in, + final boolean decompressConcatenated) + throws IOException { super(); this.in = in; - init(); + this.decompressConcatenated = decompressConcatenated; + + init(true); + initBlock(); + setupBlock(); } /** {@inheritDoc} */ @@ -210,54 +239,71 @@ public class BZip2CompressorInputStream return retChar; } - private void init() throws IOException { + private boolean init(boolean isFirstStream) throws IOException { if (null == in) { throw new IOException("No InputStream"); } - checkMagicChar('B', "first"); - checkMagicChar('Z', "second"); - checkMagicChar('h', "third"); + + int magic0 = this.in.read(); + int magic1 = this.in.read(); + int magic2 = this.in.read(); + if (magic0 == -1 && !isFirstStream) + return false; + + if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') + throw new IOException(isFirstStream + ? "Stream is not in the BZip2 format" + : "Garbage after a valid BZip2 stream"); int blockSize = this.in.read(); if ((blockSize < '1') || (blockSize > '9')) { - throw new IOException("Stream is not BZip2 formatted: illegal " - + "blocksize " + (char) blockSize); + throw new IOException("BZip2 block size is invalid"); } this.blockSize100k = blockSize - '0'; - initBlock(); - setupBlock(); - } + this.bsLive = 0; + this.computedCombinedCRC = 0; - private void checkMagicChar(char expected, String position) - throws IOException { - int magic = this.in.read(); - if (magic != expected) { - throw new IOException("Stream is not BZip2 formatted: expected '" - + expected + "' as " + position + " byte but got '" - + (char) magic + "'"); - } + return true; } private void initBlock() throws IOException { - char magic0 = bsGetUByte(); - char magic1 = bsGetUByte(); - char magic2 = bsGetUByte(); - char magic3 = bsGetUByte(); - char magic4 = bsGetUByte(); - char magic5 = bsGetUByte(); - - if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45 - && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) { - complete(); // end of file - } else if (magic0 != 0x31 || // '1' - magic1 != 0x41 || // ')' - magic2 != 0x59 || // 'Y' - magic3 != 0x26 || // '&' - magic4 != 0x53 || // 'S' - magic5 != 0x59 // 'Y' - ) { + char magic0; + char magic1; + char magic2; + char magic3; + char magic4; + char magic5; + + while (true) { + // Get the block magic bytes. + magic0 = bsGetUByte(); + magic1 = bsGetUByte(); + magic2 = bsGetUByte(); + magic3 = bsGetUByte(); + magic4 = bsGetUByte(); + magic5 = bsGetUByte(); + + // If isn't end of stream magic, break out of the loop. + if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 + || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) + break; + + // End of stream was reached. Check the combined CRC and + // advance to the next .bz2 stream if decoding concatenated + // streams. + if (complete()) + return; + } + + if (magic0 != 0x31 || // '1' + magic1 != 0x41 || // ')' + magic2 != 0x59 || // 'Y' + magic3 != 0x26 || // '&' + magic4 != 0x53 || // 'S' + magic5 != 0x59 // 'Y' + ) { this.currentState = EOF; throw new IOException("bad block header"); } else { @@ -299,7 +345,7 @@ public class BZip2CompressorInputStream this.computedCombinedCRC ^= this.computedBlockCRC; } - private void complete() throws IOException { + private boolean complete() throws IOException { this.storedCombinedCRC = bsGetInt(); this.currentState = EOF; this.data = null; @@ -307,6 +353,10 @@ public class BZip2CompressorInputStream if (this.storedCombinedCRC != this.computedCombinedCRC) { throw new IOException("BZip2 CRC error"); } + + // Look for the next .bz2 stream if decompressing + // concatenated files. + return !decompressConcatenated || !init(false); } @Override