Author: centic Date: Mon May 20 14:42:31 2019 New Revision: 1859564 URL: http://svn.apache.org/viewvc?rev=1859564&view=rev Log: FileMagic now has patterns with up to 12 bytes (JPG) Avoid exception if a very short file is encountered Add more tests
Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java?rev=1859564&r1=1859563&r2=1859564&view=diff ============================================================================== --- poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java (original) +++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java Mon May 20 14:42:31 2019 @@ -19,13 +19,13 @@ package org.apache.poi.poifs.filesystem; import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER; import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER; -import static java.nio.charset.StandardCharsets.UTF_8; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Arrays; import org.apache.poi.poifs.storage.HeaderBlockConstants; import org.apache.poi.util.IOUtils; @@ -98,6 +98,9 @@ public enum FileMagic { /** UNKNOWN magic */ UNKNOWN(new byte[0]); + // update this if a longer pattern is added + final static int MAX_PATTERN_LENGTH = 12; + final byte[][] magic; FileMagic(long magic) { @@ -120,6 +123,12 @@ public enum FileMagic { public static FileMagic valueOf(byte[] magic) { for (FileMagic fm : values()) { for (byte[] ma : fm.magic) { + // don't try to match if the given byte-array is too short + // for this pattern anyway + if(magic.length < ma.length) { + continue; + } + if (findMagic(ma, magic)) { return fm; } @@ -149,7 +158,13 @@ public enum FileMagic { */ public static FileMagic valueOf(final File inp) throws IOException { try (FileInputStream fis = new FileInputStream(inp)) { - final byte[] data = IOUtils.toByteArray(fis, 8); + // read as many bytes as possible, up to the required number of bytes + byte[] data = new byte[MAX_PATTERN_LENGTH]; + int read = IOUtils.readFully(fis, data, 0, MAX_PATTERN_LENGTH); + + // only use the bytes that could be read + data = Arrays.copyOf(data, read); + return FileMagic.valueOf(data); } } @@ -173,8 +188,8 @@ public enum FileMagic { throw new IOException("getFileMagic() only operates on streams which support mark(int)"); } - // Grab the first 8 bytes - byte[] data = IOUtils.peekFirst8Bytes(inp); + // Grab the first bytes of this stream + byte[] data = IOUtils.peekFirstNBytes(inp, MAX_PATTERN_LENGTH); return FileMagic.valueOf(data); } Modified: poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java?rev=1859564&r1=1859563&r2=1859564&view=diff ============================================================================== --- poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java (original) +++ poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestFileMagic.java Mon May 20 14:42:31 2019 @@ -22,9 +22,13 @@ import org.apache.poi.POIDataSamples; import org.junit.Test; import java.io.BufferedInputStream; +import java.io.File; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Arrays; +import java.util.Random; import static org.junit.Assert.*; @@ -43,6 +47,14 @@ public class TestFileMagic { assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8))); assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8))); + assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xDB })); + assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE0, 'a', 'b', 'J', 'F', 'I', 'F', 0x00, 0x01 })); + assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xEE })); + assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE1, 'd', 'c', 'E', 'x', 'i', 'f', 0x00, 0x00 })); + + assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("something".getBytes(Charsets.UTF_8))); + assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(new byte[0])); + try { FileMagic.valueOf("some string"); fail("Should catch exception here"); @@ -82,4 +94,81 @@ public class TestFileMagic { assertNotSame(stream, FileMagic.prepareToCheckMagic(stream)); } } + + @Test + public void testMatchingButTooLessData() { + // this matches JPG, but is not long enough, previously this caused an Exception + byte[] data = new byte[] { -1, -40, -1, -32, 0, 16, 74, 70 }; + + assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data)); + } + + @Test + public void testShortFile() throws IOException { + // having a file shorter than 8 bytes previously caused an exception + byte[] data = new byte[] { -1, -40, -1, -32, 0 }; + + File file = File.createTempFile("TestFileMagic", ".bin"); + try { + try (FileOutputStream fos = new FileOutputStream(file)) { + fos.write(data); + } + + assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(file)); + } finally { + assertTrue(file.delete()); + } + } + + @Test(expected = IOException.class) + public void testMarkRequired() throws IOException { + byte[] data = new byte[] { -1, -40, -1, -32, 0 }; + + File file = File.createTempFile("TestFileMagic", ".bin"); + try { + try (FileOutputStream fos = new FileOutputStream(file)) { + fos.write(data); + } + + // a FileInputStream does not support "marking" + try (FileInputStream str = new FileInputStream(file)) { + assertFalse(str.markSupported()); + + FileMagic.valueOf(str); + } + } finally { + assertTrue(file.delete()); + } + } + + @Test + public void testPatterns() { + // just try to trash the functionality with some byte-patterns + for(int i = 0; i < 256;i++) { + final byte[] data = new byte[12]; + for(int j = 0;j < 12; j++) { + data[j] = (byte)i; + + assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(data)); + } + } + } + + @Test + public void testRandomPatterns() { + Random random = new Random(); + + // just try to trash the functionality with some byte-patterns + for(int i = 0; i < 1000;i++) { + final byte[] data = new byte[12]; + random.nextBytes(data); + + // we cannot check for UNKNOWN as we might hit valid byte-patterns here as well + try { + assertNotNull(FileMagic.valueOf(data)); + } catch (Exception e) { + throw new IllegalStateException("Failed with pattern " + Arrays.toString(data), e); + } + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@poi.apache.org For additional commands, e-mail: commits-h...@poi.apache.org