Author: jukka
Date: Mon Oct 22 12:48:34 2007
New Revision: 587217

URL: http://svn.apache.org/viewvc?rev=587217&view=rev
Log:
TIKA-84 - Add MimeTypes.getMimeType(InputStream)
    - Added also getMimeType(String, InputStream)
    - Extracted common code to readMagicHeader(InputStream)
    - Javadoc improvements

Modified:
    incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=587217&r1=587216&r2=587217&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java 
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/mime/MimeTypes.java Mon 
Oct 22 12:48:34 2007
@@ -37,8 +37,15 @@
  * This class is a MimeType repository. It gathers a set of MimeTypes and
  * enables to retrieves a content-type from its name, from a file name, or from
  * a magic character sequence.
- * 
- * 
+ * <p>
+ * The MIME type detection methods that take an [EMAIL PROTECTED] InputStream} 
as
+ * an argument will never reads more than [EMAIL PROTECTED] #getMinLength()} 
bytes
+ * from the stream. Also the given stream is never
+ * [EMAIL PROTECTED] InputStream#close() closed}, [EMAIL PROTECTED] 
InputStream#mark(int) marked},
+ * or [EMAIL PROTECTED] InputStream#reset() reset} by the methods. Thus a 
client can
+ * use the [EMAIL PROTECTED] InputStream#markSupported() mark feature} of the 
stream
+ * (if available) to restore the stream back to the state it was before type
+ * detection if it wants to process the stream based on the detected type.
  */
 public final class MimeTypes {
 
@@ -178,14 +185,13 @@
 
     /**
      * Returns the MIME type that best matches the given first few bytes
-     * of a document stream. Returns <code>null</code> if no matching type
-     * is found.
+     * of a document stream.
      * <p>
      * The given byte array is expected to be at least [EMAIL PROTECTED] 
#getMinLength()}
      * long, or shorter only if the document stream itself is shorter.
      *
      * @param data first few bytes of a document stream
-     * @return matching MIME type, or <code>null</code>
+     * @return matching MIME type, or <code>null</code> if no match is found
      */
     public MimeType getMimeType(byte[] data) {
         assert data != null;
@@ -211,19 +217,30 @@
     /**
      * Returns the MIME type that best matches the first few bytes of the
      * given document stream.
-     * <p>
-     * If the given stream supports the mark feature (and doesn't throw an
-     * exception during this method call), then it is safe to use
-     * <code>stream.mark([EMAIL PROTECTED] #getMinLength()})</code> before and
-     * <code>stream.reset()</code> after this method call to restore the
-     * stream to the state it was in before this method call.
      *
      * @see #getMimeType(byte[])
      * @param stream document stream
-     * @return matching MIME type
+     * @return matching MIME type, or <code>null</code> if no match is found
      * @throws IOException if the stream can be read
      */
     public MimeType getMimeType(InputStream stream) throws IOException {
+        return getMimeType(readMagicHeader(stream));
+    }
+
+    /**
+     * Reads the first [EMAIL PROTECTED] #getMinLength()} bytes from the given 
stream.
+     * If the stream is shorter, then the entire content of the stream is
+     * returned.
+     * <p>
+     * The given stream is never [EMAIL PROTECTED] InputStream#close() closed},
+     * [EMAIL PROTECTED] InputStream#mark(int) marked}, or
+     * [EMAIL PROTECTED] InputStream#reset() reset} by this method.
+     *
+     * @param stream stream to be read
+     * @return first [EMAIL PROTECTED] #getMinLength()} (or fewer) bytes of 
the stream
+     * @throws IOException if the stream can not be read
+     */
+    private byte[] readMagicHeader(InputStream stream) throws IOException {
         assert stream != null;
 
         byte[] bytes = new byte[getMinLength()];
@@ -233,14 +250,14 @@
         while (lastRead != -1) {
             totalRead += lastRead;
             if (totalRead == bytes.length) {
-                return getMimeType(bytes);
+                return bytes;
             }
             lastRead = stream.read(bytes, totalRead, bytes.length - totalRead);
         }
 
         byte[] shorter = new byte[totalRead];
         System.arraycopy(bytes, 0, shorter, 0, totalRead);
-        return getMimeType(shorter);
+        return shorter;
     }
 
     /**
@@ -271,6 +288,21 @@
         }
 
         return mimeType;
+    }
+
+    /**
+     * Returns the MIME type that best matches the given document name and
+     * the first few bytes of the given document stream.
+     *
+     * @see #getMimeType(String, byte[])
+     * @param name document name
+     * @param stream document stream
+     * @return matching MIME type, or <code>null</code> if no match is found
+     * @throws IOException if the stream can not be read
+     */
+    public MimeType getMimeType(String name, InputStream stream)
+            throws IOException {
+        return getMimeType(name, readMagicHeader(stream));
     }
 
     /**


Reply via email to