Author: jukka Date: Fri Oct 19 15:27:58 2007 New Revision: 586632 URL: http://svn.apache.org/viewvc?rev=586632&view=rev Log: TIKA-84 - Add MimeTypes.getMimeType(InputStream)
Modified: incubator/tika/trunk/CHANGES.txt incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Modified: incubator/tika/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=586632&r1=586631&r2=586632&view=diff ============================================================================== --- incubator/tika/trunk/CHANGES.txt (original) +++ incubator/tika/trunk/CHANGES.txt Fri Oct 19 15:27:58 2007 @@ -121,3 +121,5 @@ 54. TIKA-71 - Remove ParserConfig and ParserFactory (jukka) 55. TIKA-83 - Create a org.apache.tika.sax package for SAX utilities (jukka) + +56. TIKA-84 - Add MimeTypes.getMimeType(InputStream) (jukka) Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=586632&r1=586631&r2=586632&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Fri Oct 19 15:27:58 2007 @@ -18,6 +18,8 @@ // JDK imports import java.io.File; +import java.io.IOException; +import java.io.InputStream; import java.net.URL; import java.util.Arrays; import java.util.Map; @@ -175,26 +177,18 @@ } /** - * Find the Mime Content Type of a stream from its content. - * - * @param data - * are the first bytes of data of the content to analyze. - * Depending on the length of provided data, all known MimeTypes - * are checked. If the length of provided data is greater or - * egals to the value returned by [EMAIL PROTECTED] #getMinLength()}, then - * all known MimeTypes are checked, otherwise only the MimeTypes - * that could be analyzed with the length of provided data are - * analyzed. - * - * @return The Mime Content Type found for the specified data, or - * <code>null</code> if none is found. - * @see #getMinLength() + * Returns the MIME type that best matches the given first few bytes + * of a document stream. Returns <code>null</code> if no matching type + * is found. + * <p> + * The given byte array is expected to be at least [EMAIL PROTECTED] #getMinLength()} + * long, or shorter only if the document stream itself is shorter. + * + * @param data first few bytes of a document stream + * @return matching MIME type, or <code>null</code> */ public MimeType getMimeType(byte[] data) { - // Preliminary checks - if ((data == null) || (data.length < 1)) { - return null; - } + assert data != null; // First, check for XML descriptions (level by level) for (MimeInfo info : xmls) { @@ -212,6 +206,41 @@ } return null; + } + + /** + * Returns the MIME type that best matches the first few bytes of the + * given document stream. + * <p> + * If the given stream supports the mark feature (and doesn't throw an + * exception during this method call), then it is safe to use + * <code>stream.mark([EMAIL PROTECTED] #getMinLength()})</code> before and + * <code>stream.reset()</code> after this method call to restore the + * stream to the state it was in before this method call. + * + * @see #getMimeType(byte[]) + * @param stream document stream + * @return matching MIME type + * @throws IOException if the stream can be read + */ + public MimeType getMimeType(InputStream stream) throws IOException { + assert stream != null; + + byte[] bytes = new byte[getMinLength()]; + int totalRead = 0; + + int lastRead = stream.read(bytes); + while (lastRead != -1) { + totalRead += lastRead; + if (totalRead == bytes.length) { + return getMimeType(bytes); + } + lastRead = stream.read(bytes, totalRead, bytes.length - totalRead); + } + + byte[] shorter = new byte[totalRead]; + System.arraycopy(bytes, 0, shorter, 0, totalRead); + return getMimeType(shorter); } /**