Author: jukka Date: Mon Oct 22 12:48:34 2007 New Revision: 587217 URL: http://svn.apache.org/viewvc?rev=587217&view=rev Log: TIKA-84 - Add MimeTypes.getMimeType(InputStream) - Added also getMimeType(String, InputStream) - Extracted common code to readMagicHeader(InputStream) - Javadoc improvements
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=587217&r1=587216&r2=587217&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Mon Oct 22 12:48:34 2007 @@ -37,8 +37,15 @@ * This class is a MimeType repository. It gathers a set of MimeTypes and * enables to retrieves a content-type from its name, from a file name, or from * a magic character sequence. - * - * + * <p> + * The MIME type detection methods that take an [EMAIL PROTECTED] InputStream} as + * an argument will never reads more than [EMAIL PROTECTED] #getMinLength()} bytes + * from the stream. Also the given stream is never + * [EMAIL PROTECTED] InputStream#close() closed}, [EMAIL PROTECTED] InputStream#mark(int) marked}, + * or [EMAIL PROTECTED] InputStream#reset() reset} by the methods. Thus a client can + * use the [EMAIL PROTECTED] InputStream#markSupported() mark feature} of the stream + * (if available) to restore the stream back to the state it was before type + * detection if it wants to process the stream based on the detected type. */ public final class MimeTypes { @@ -178,14 +185,13 @@ /** * Returns the MIME type that best matches the given first few bytes - * of a document stream. Returns <code>null</code> if no matching type - * is found. + * of a document stream. * <p> * The given byte array is expected to be at least [EMAIL PROTECTED] #getMinLength()} * long, or shorter only if the document stream itself is shorter. * * @param data first few bytes of a document stream - * @return matching MIME type, or <code>null</code> + * @return matching MIME type, or <code>null</code> if no match is found */ public MimeType getMimeType(byte[] data) { assert data != null; @@ -211,19 +217,30 @@ /** * Returns the MIME type that best matches the first few bytes of the * given document stream. - * <p> - * If the given stream supports the mark feature (and doesn't throw an - * exception during this method call), then it is safe to use - * <code>stream.mark([EMAIL PROTECTED] #getMinLength()})</code> before and - * <code>stream.reset()</code> after this method call to restore the - * stream to the state it was in before this method call. * * @see #getMimeType(byte[]) * @param stream document stream - * @return matching MIME type + * @return matching MIME type, or <code>null</code> if no match is found * @throws IOException if the stream can be read */ public MimeType getMimeType(InputStream stream) throws IOException { + return getMimeType(readMagicHeader(stream)); + } + + /** + * Reads the first [EMAIL PROTECTED] #getMinLength()} bytes from the given stream. + * If the stream is shorter, then the entire content of the stream is + * returned. + * <p> + * The given stream is never [EMAIL PROTECTED] InputStream#close() closed}, + * [EMAIL PROTECTED] InputStream#mark(int) marked}, or + * [EMAIL PROTECTED] InputStream#reset() reset} by this method. + * + * @param stream stream to be read + * @return first [EMAIL PROTECTED] #getMinLength()} (or fewer) bytes of the stream + * @throws IOException if the stream can not be read + */ + private byte[] readMagicHeader(InputStream stream) throws IOException { assert stream != null; byte[] bytes = new byte[getMinLength()]; @@ -233,14 +250,14 @@ while (lastRead != -1) { totalRead += lastRead; if (totalRead == bytes.length) { - return getMimeType(bytes); + return bytes; } lastRead = stream.read(bytes, totalRead, bytes.length - totalRead); } byte[] shorter = new byte[totalRead]; System.arraycopy(bytes, 0, shorter, 0, totalRead); - return getMimeType(shorter); + return shorter; } /** @@ -271,6 +288,21 @@ } return mimeType; + } + + /** + * Returns the MIME type that best matches the given document name and + * the first few bytes of the given document stream. + * + * @see #getMimeType(String, byte[]) + * @param name document name + * @param stream document stream + * @return matching MIME type, or <code>null</code> if no match is found + * @throws IOException if the stream can not be read + */ + public MimeType getMimeType(String name, InputStream stream) + throws IOException { + return getMimeType(name, readMagicHeader(stream)); } /**