Author: jukka
Date: Fri Oct 19 15:27:58 2007
New Revision: 586632

URL: http://svn.apache.org/viewvc?rev=586632&view=rev
Log:
TIKA-84 - Add MimeTypes.getMimeType(InputStream)

Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java

Modified: incubator/tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=586632&r1=586631&r2=586632&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Fri Oct 19 15:27:58 2007
@@ -121,3 +121,5 @@
 54. TIKA-71 - Remove ParserConfig and ParserFactory (jukka)
 
 55. TIKA-83 - Create a org.apache.tika.sax package for SAX utilities (jukka)
+
+56. TIKA-84 - Add MimeTypes.getMimeType(InputStream) (jukka)

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=586632&r1=586631&r2=586632&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java 
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Fri 
Oct 19 15:27:58 2007
@@ -18,6 +18,8 @@
 
 // JDK imports
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
 import java.net.URL;
 import java.util.Arrays;
 import java.util.Map;
@@ -175,26 +177,18 @@
     }
 
     /**
-     * Find the Mime Content Type of a stream from its content.
-     * 
-     * @param data
-     *            are the first bytes of data of the content to analyze.
-     *            Depending on the length of provided data, all known MimeTypes
-     *            are checked. If the length of provided data is greater or
-     *            egals to the value returned by [EMAIL PROTECTED] 
#getMinLength()}, then
-     *            all known MimeTypes are checked, otherwise only the MimeTypes
-     *            that could be analyzed with the length of provided data are
-     *            analyzed.
-     * 
-     * @return The Mime Content Type found for the specified data, or
-     *         <code>null</code> if none is found.
-     * @see #getMinLength()
+     * Returns the MIME type that best matches the given first few bytes
+     * of a document stream. Returns <code>null</code> if no matching type
+     * is found.
+     * <p>
+     * The given byte array is expected to be at least [EMAIL PROTECTED] 
#getMinLength()}
+     * long, or shorter only if the document stream itself is shorter.
+     *
+     * @param data first few bytes of a document stream
+     * @return matching MIME type, or <code>null</code>
      */
     public MimeType getMimeType(byte[] data) {
-        // Preliminary checks
-        if ((data == null) || (data.length < 1)) {
-            return null;
-        }
+        assert data != null;
 
         // First, check for XML descriptions (level by level)
         for (MimeInfo info : xmls) {
@@ -212,6 +206,41 @@
         }
 
         return null;
+    }
+
+    /**
+     * Returns the MIME type that best matches the first few bytes of the
+     * given document stream.
+     * <p>
+     * If the given stream supports the mark feature (and doesn't throw an
+     * exception during this method call), then it is safe to use
+     * <code>stream.mark([EMAIL PROTECTED] #getMinLength()})</code> before and
+     * <code>stream.reset()</code> after this method call to restore the
+     * stream to the state it was in before this method call.
+     *
+     * @see #getMimeType(byte[])
+     * @param stream document stream
+     * @return matching MIME type
+     * @throws IOException if the stream can be read
+     */
+    public MimeType getMimeType(InputStream stream) throws IOException {
+        assert stream != null;
+
+        byte[] bytes = new byte[getMinLength()];
+        int totalRead = 0;
+
+        int lastRead = stream.read(bytes);
+        while (lastRead != -1) {
+            totalRead += lastRead;
+            if (totalRead == bytes.length) {
+                return getMimeType(bytes);
+            }
+            lastRead = stream.read(bytes, totalRead, bytes.length - totalRead);
+        }
+
+        byte[] shorter = new byte[totalRead];
+        System.arraycopy(bytes, 0, shorter, 0, totalRead);
+        return getMimeType(shorter);
     }
 
     /**


Reply via email to