Repository: commons-compress Updated Branches: refs/heads/master b19bf2b7e -> 092bcac5b
COMPRESS-390: Expose stream offsets and size Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/3bf400ed Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/3bf400ed Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/3bf400ed Branch: refs/heads/master Commit: 3bf400edc1add6078a9bd935e3ae684b8bf8493f Parents: b19bf2b Author: Zbynek Vyskovsky <kvr...@gmail.com> Authored: Sun Apr 30 22:03:07 2017 -0700 Committer: Stefan Bodewig <bode...@apache.org> Committed: Thu May 4 16:47:17 2017 +0200 ---------------------------------------------------------------------- .../compress/archivers/EntryStreamOffsets.java | 46 +++++++++++++ .../compress/archivers/tar/TarArchiveEntry.java | 2 +- .../compress/archivers/zip/ZipArchiveEntry.java | 42 +++++++++++- .../archivers/zip/ZipArchiveInputStream.java | 7 +- .../commons/compress/archivers/zip/ZipFile.java | 55 ++++++---------- .../zip/ZipArchiveInputStreamTest.java | 24 +++++++ .../compress/archivers/zip/ZipFileTest.java | 68 ++++++++++++++++++++ 7 files changed, 205 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java b/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java new file mode 100644 index 0000000..a73d079 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/EntryStreamOffsets.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + + +/** + * Provides information about ArchiveEntry stream offsets. + */ +public interface EntryStreamOffsets { + + /** Special value indicating that the offset is unknown. */ + long OFFSET_UNKNOWN = -1; + + /** + * Gets the offset of data stream within the archive file, + * + * @return + * the offset of entry data stream, {@code OFFSET_UNKNOWN} if not known. + */ + long getDataOffset(); + + /** + * Indicates whether the stream is contiguous, i.e. not split among + * several archive parts, interspersed with control blocks, etc. + * + * @return + * true if stream is contiguous, false otherwise. + */ + boolean isStreamContiguous(); +} http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java index 8fed4a1..1d572d0 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java @@ -142,7 +142,7 @@ import org.apache.commons.compress.utils.ArchiveUtils; * @NotThreadSafe */ -public class TarArchiveEntry implements TarConstants, ArchiveEntry { +public class TarArchiveEntry implements ArchiveEntry, TarConstants { private static final TarArchiveEntry[] EMPTY_TAR_ARCHIVE_ENTRIES = new TarArchiveEntry[0]; /** The entry's name. */ http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java index 2abd22b..f769cf5 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java @@ -18,6 +18,7 @@ package org.apache.commons.compress.archivers.zip; import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.EntryStreamOffsets; import java.io.File; import java.util.ArrayList; @@ -48,7 +49,8 @@ import java.util.zip.ZipException; * @NotThreadSafe */ public class ZipArchiveEntry extends java.util.zip.ZipEntry - implements ArchiveEntry { + implements ArchiveEntry, EntryStreamOffsets +{ public static final int PLATFORM_UNIX = 3; public static final int PLATFORM_FAT = 0; @@ -89,6 +91,10 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry private byte[] rawName = null; private GeneralPurposeBit gpb = new GeneralPurposeBit(); private static final ZipExtraField[] noExtraFields = new ZipExtraField[0]; + private long localHeaderOffset = OFFSET_UNKNOWN; + private long dataOffset = OFFSET_UNKNOWN; + private boolean isStreamContiguous = false; + /** * Creates a new zip entry with the specified name. @@ -678,6 +684,38 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry return null; } + protected long getLocalHeaderOffset() { + return this.localHeaderOffset; + } + + protected void setLocalHeaderOffset(long localHeaderOffset) { + this.localHeaderOffset = localHeaderOffset; + } + + @Override + public long getDataOffset() { + return dataOffset; + } + + /** + * Sets the data offset. + * + * @param dataOffset + * new value of data offset. + */ + protected void setDataOffset(long dataOffset) { + this.dataOffset = dataOffset; + } + + @Override + public boolean isStreamContiguous() { + return isStreamContiguous; + } + + protected void setStreamContiguous(boolean isStreamContiguous) { + this.isStreamContiguous = isStreamContiguous; + } + /** * Get the hashCode of the entry. * This uses the name as the hashcode. @@ -801,6 +839,8 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry other.getCentralDirectoryExtra()) && Arrays.equals(getLocalFileDataExtra(), other.getLocalFileDataExtra()) + && localHeaderOffset == other.localHeaderOffset + && dataOffset == other.dataOffset && gpb.equals(other.gpb); } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java index 3783576..7fda989 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java @@ -226,6 +226,7 @@ public class ZipArchiveInputStream extends ArchiveInputStream { firstEntry = false; } + long currentHeaderOffset = getBytesRead(); try { if (firstEntry) { // split archives have a special signature before the @@ -307,6 +308,10 @@ public class ZipArchiveInputStream extends ArchiveInputStream { processZip64Extra(size, cSize); + current.entry.setLocalHeaderOffset(currentHeaderOffset); + current.entry.setDataOffset(getBytesRead()); + current.entry.setStreamContiguous(true); + if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); @@ -319,7 +324,7 @@ public class ZipArchiveInputStream extends ArchiveInputStream { current.in = new BZip2CompressorInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); } } - + entriesRead++; return current.entry; } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index 6b2c22a..486d5e7 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -103,11 +103,6 @@ public class ZipFile implements Closeable { private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE); - private static final class OffsetEntry { - private long headerOffset = -1; - private long dataOffset = -1; - } - /** * The encoding to use for filenames and the file comment. * @@ -440,8 +435,7 @@ public class ZipFile implements Closeable { if (!(ze instanceof Entry)) { return null; } - final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); - final long start = offsetEntry.dataOffset; + final long start = ze.getDataOffset(); return createBoundedInputStream(start, ze.getCompressedSize()); } @@ -480,9 +474,8 @@ public class ZipFile implements Closeable { return null; } // cast valididty is checked just above - final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); ZipUtil.checkRequestedFeatures(ze); - final long start = offsetEntry.dataOffset; + final long start = ze.getDataOffset(); // doesn't get closed if the method is not supported, but doesn't hold any resources either final BoundedInputStream bis = createBoundedInputStream(start, ze.getCompressedSize()); //NOSONAR @@ -645,8 +638,7 @@ public class ZipFile implements Closeable { cfhBbuf.rewind(); IOUtils.readFully(archive, cfhBbuf); int off = 0; - final OffsetEntry offset = new OffsetEntry(); - final Entry ze = new Entry(offset); + final Entry ze = new Entry(); final int versionMadeBy = ZipShort.getValue(cfhBuf, off); off += SHORT; @@ -705,7 +697,7 @@ public class ZipFile implements Closeable { ze.setName(entryEncoding.decode(fileName), fileName); // LFH offset, - offset.headerOffset = ZipLong.getValue(cfhBuf, off); + ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); // data offset will be filled later entries.add(ze); @@ -713,7 +705,7 @@ public class ZipFile implements Closeable { IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData)); ze.setCentralDirectoryExtra(cdExtraData); - setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); + setSizesAndOffsetFromZip64Extra(ze, diskStart); final byte[] comment = new byte[commentLen]; IOUtils.readFully(archive, ByteBuffer.wrap(comment)); @@ -737,7 +729,6 @@ public class ZipFile implements Closeable { * size would be invalid.</p> */ private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze, - final OffsetEntry offset, final int diskStart) throws IOException { final Zip64ExtendedInformationExtraField z64 = @@ -747,7 +738,7 @@ public class ZipFile implements Closeable { final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; final boolean hasRelativeHeaderOffset = - offset.headerOffset == ZIP64_MAGIC; + ze.getLocalHeaderOffset() == ZIP64_MAGIC; z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, @@ -766,8 +757,7 @@ public class ZipFile implements Closeable { } if (hasRelativeHeaderOffset) { - offset.headerOffset = - z64.getRelativeHeaderOffset().getLongValue(); + ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); } } } @@ -1037,8 +1027,7 @@ public class ZipFile implements Closeable { // entries is filled in populateFromCentralDirectory and // never modified final Entry ze = (Entry) zipArchiveEntry; - final OffsetEntry offsetEntry = ze.getOffsetEntry(); - final long offset = offsetEntry.headerOffset; + final long offset = ze.getLocalHeaderOffset(); archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); wordBbuf.rewind(); IOUtils.readFully(archive, wordBbuf); @@ -1051,8 +1040,9 @@ public class ZipFile implements Closeable { final byte[] localExtraData = new byte[extraFieldLen]; IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData)); ze.setExtra(localExtraData); - offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH - + SHORT + SHORT + fileNameLen + extraFieldLen; + ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + + SHORT + SHORT + fileNameLen + extraFieldLen); + ze.setStreamContiguous(true); if (entriesWithoutUTF8Flag.containsKey(ze)) { final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); @@ -1232,8 +1222,8 @@ public class ZipFile implements Closeable { if (ent2 == null) { return -1; } - final long val = (ent1.getOffsetEntry().headerOffset - - ent2.getOffsetEntry().headerOffset); + final long val = (ent1.getLocalHeaderOffset() + - ent2.getLocalHeaderOffset()); return val == 0 ? 0 : val < 0 ? -1 : +1; } }; @@ -1243,20 +1233,13 @@ public class ZipFile implements Closeable { */ private static class Entry extends ZipArchiveEntry { - private final OffsetEntry offsetEntry; - - Entry(final OffsetEntry offset) { - this.offsetEntry = offset; - } - - OffsetEntry getOffsetEntry() { - return offsetEntry; + Entry() { } @Override public int hashCode() { return 3 * super.hashCode() - + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE); + + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); } @Override @@ -1264,10 +1247,10 @@ public class ZipFile implements Closeable { if (super.equals(other)) { // super.equals would return false if other were not an Entry final Entry otherEntry = (Entry) other; - return offsetEntry.headerOffset - == otherEntry.offsetEntry.headerOffset - && offsetEntry.dataOffset - == otherEntry.offsetEntry.dataOffset; + return getLocalHeaderOffset() + == otherEntry.getLocalHeaderOffset() + && getDataOffset() + == otherEntry.getDataOffset(); } return false; } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java index ea087c4..cd7efdc 100644 --- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStreamTest.java @@ -26,13 +26,16 @@ import static org.junit.Assert.fail; import java.io.BufferedInputStream; import java.io.EOFException; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.zip.ZipException; +import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; import org.junit.Test; public class ZipArchiveInputStreamTest { @@ -248,6 +251,27 @@ public class ZipArchiveInputStreamTest { } } + /** + * Test correct population of header and data offsets. + */ + @Test + public void testOffsets() throws Exception { + // mixed.zip contains both inflated and stored files + try (InputStream archiveStream = ZipArchiveInputStream.class.getResourceAsStream("/mixed.zip"); + ZipArchiveInputStream zipStream = new ZipArchiveInputStream((archiveStream)) + ) { + ZipArchiveEntry inflatedEntry = zipStream.getNextZipEntry(); + Assert.assertEquals("inflated.txt", inflatedEntry.getName()); + Assert.assertEquals(0x0000, inflatedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x0046, inflatedEntry.getDataOffset()); + ZipArchiveEntry storedEntry = zipStream.getNextZipEntry(); + Assert.assertEquals("stored.txt", storedEntry.getName()); + Assert.assertEquals(0x5892, storedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x58d6, storedEntry.getDataOffset()); + Assert.assertNull(zipStream.getNextZipEntry()); + } + } + private static byte[] readEntry(ZipArchiveInputStream zip, ZipArchiveEntry zae) throws IOException { final int len = (int)zae.getSize(); final byte[] buff = new byte[len]; http://git-wip-us.apache.org/repos/asf/commons-compress/blob/3bf400ed/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java index 3e83675..8455bd7 100644 --- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java @@ -21,6 +21,7 @@ package org.apache.commons.compress.archivers.zip; import static org.apache.commons.compress.AbstractTestCase.getFile; import static org.junit.Assert.*; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -35,11 +36,13 @@ import java.util.HashMap; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicInteger; +import java.util.zip.CRC32; import java.util.zip.ZipEntry; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; import org.junit.After; +import org.junit.Assert; import org.junit.Test; public class ZipFileTest { @@ -398,6 +401,65 @@ public class ZipFileTest { assertEquals(2, passedCount.get()); } + /** + * Test correct population of header and data offsets. + */ + @Test + public void testOffsets() throws Exception { + // mixed.zip contains both inflated and stored files + final File archive = getFile("mixed.zip"); + try (ZipFile zf = new ZipFile(archive)) { + ZipArchiveEntry inflatedEntry = zf.getEntry("inflated.txt"); + Assert.assertEquals(0x0000, inflatedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x0046, inflatedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + ZipArchiveEntry storedEntry = zf.getEntry("stored.txt"); + Assert.assertEquals(0x5892, storedEntry.getLocalHeaderOffset()); + Assert.assertEquals(0x58d6, storedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + } + } + + /** + * Test correct population of header and data offsets when they are written after stream. + */ + @Test + public void testDelayedOffsetsAndSizes() throws Exception { + ByteArrayOutputStream zipContent = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zipOutput = new ZipArchiveOutputStream(zipContent)) { + ZipArchiveEntry inflatedEntry = new ZipArchiveEntry("inflated.txt"); + inflatedEntry.setMethod(ZipEntry.DEFLATED); + zipOutput.putArchiveEntry(inflatedEntry); + zipOutput.write("Hello Deflated\n".getBytes()); + zipOutput.closeArchiveEntry(); + + byte[] storedContent = "Hello Stored\n".getBytes(); + ZipArchiveEntry storedEntry = new ZipArchiveEntry("stored.txt"); + storedEntry.setMethod(ZipEntry.STORED); + storedEntry.setSize(storedContent.length); + storedEntry.setCrc(calculateCrc32(storedContent)); + zipOutput.putArchiveEntry(storedEntry); + zipOutput.write("Hello Stored\n".getBytes()); + zipOutput.closeArchiveEntry(); + + } + + try (ZipFile zf = new ZipFile(new SeekableInMemoryByteChannel(zipContent.toByteArray()))) { + ZipArchiveEntry inflatedEntry = zf.getEntry("inflated.txt"); + Assert.assertNotEquals(-1L, inflatedEntry.getLocalHeaderOffset()); + Assert.assertNotEquals(-1L, inflatedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + Assert.assertNotEquals(-1L, inflatedEntry.getCompressedSize()); + Assert.assertNotEquals(-1L, inflatedEntry.getSize()); + ZipArchiveEntry storedEntry = zf.getEntry("stored.txt"); + Assert.assertNotEquals(-1L, storedEntry.getLocalHeaderOffset()); + Assert.assertNotEquals(-1L, storedEntry.getDataOffset()); + Assert.assertTrue(inflatedEntry.isStreamContiguous()); + Assert.assertNotEquals(-1L, storedEntry.getCompressedSize()); + Assert.assertNotEquals(-1L, storedEntry.getSize()); + } + } + private void assertAllReadMethods(byte[] expected, ZipFile zipFile, ZipArchiveEntry entry) { // simple IOUtil read try (InputStream stream = zf.getInputStream(entry)) { @@ -461,6 +523,12 @@ public class ZipFileTest { return full; } + private long calculateCrc32(byte[] content) { + CRC32 crc = new CRC32(); + crc.update(content); + return crc.getValue(); + } + /* * ordertest.zip has been handcrafted. *