Author: nick
Date: Wed Nov 29 06:49:40 2006
New Revision: 480585

URL: http://svn.apache.org/viewvc?view=rev&rev=480585
Log:
Support compressed pictures properly, from bug #41032

Added:
    
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc
   (with props)
    
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf
   (with props)
Modified:
    
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java
    
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java
    
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
    
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java

Modified: 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java
URL: 
http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java?view=diff&rev=480585&r1=480584&r2=480585
==============================================================================
--- 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java
 (original)
+++ 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/FIBFieldHandler.java
 Wed Nov 29 06:49:40 2006
@@ -25,6 +25,8 @@
 import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
 
 public class FIBFieldHandler
 {
@@ -122,6 +124,8 @@
   public static final int STTBLISTNAMES = 91;
   public static final int STTBFUSSR = 92;
 
+  private static POILogger log = 
POILogFactory.getLogger(FIBFieldHandler.class);
+
   private static final int FIELD_SIZE = LittleEndian.INT_SIZE * 2;
 
   private HashMap _unknownMap = new HashMap();
@@ -146,9 +150,18 @@
       {
         if (dsSize > 0)
         {
-          UnhandledDataStructure unhandled = new UnhandledDataStructure(
-            tableStream, dsOffset, dsSize);
-          _unknownMap.put(new Integer(x), unhandled);
+          if (dsOffset + dsSize > tableStream.length)
+          {
+            log.log(POILogger.WARN, "Unhandled data structure points to 
outside the buffer. " +
+                                    "offset = " + dsOffset + ", length = " + 
dsSize +
+                                    ", buffer length = " + tableStream.length);
+          }
+          else
+          {
+            UnhandledDataStructure unhandled = new UnhandledDataStructure(
+              tableStream, dsOffset, dsSize);
+            _unknownMap.put(new Integer(x), unhandled);
+          }
         }
       }
       _fields[x*2] = dsOffset;

Modified: 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java
URL: 
http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java?view=diff&rev=480585&r1=480584&r2=480585
==============================================================================
--- 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java
 (original)
+++ 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/UnhandledDataStructure.java
 Wed Nov 29 06:49:40 2006
@@ -23,7 +23,13 @@
 
   public UnhandledDataStructure(byte[] buf, int offset, int length)
   {
+//    System.out.println("Yes, using my code");
     _buf = new byte[length];
+    if (offset + length > buf.length)
+    {
+      throw new IndexOutOfBoundsException("buffer length is " + buf.length +
+                                          "but code is trying to read " + 
length + " from offset " + offset);
+    }
     System.arraycopy(buf, offset, _buf, 0, length);
   }
 

Modified: 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
URL: 
http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java?view=diff&rev=480585&r1=480584&r2=480585
==============================================================================
--- 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java 
(original)
+++ 
jakarta/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java 
Wed Nov 29 06:49:40 2006
@@ -18,9 +18,14 @@
 package org.apache.poi.hwpf.usermodel;
 
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.POILogFactory;
 
 import java.io.OutputStream;
 import java.io.IOException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.zip.InflaterInputStream;
 
 /**
  * Represents embedded picture extracted from Word Document
@@ -28,8 +33,11 @@
  */
 public class Picture
 {
+  private static final POILogger log = POILogFactory.getLogger(Picture.class);
+
 //  public static final int FILENAME_OFFSET = 0x7C;
 //  public static final int FILENAME_SIZE_OFFSET = 0x6C;
+  static final int MFPMM_OFFSET = 0x6;
   static final int BLOCK_TYPE_OFFSET = 0xE;
   static final int PICT_HEADER_OFFSET = 0x4;
   static final int UNKNOWN_HEADER_SIZE = 0x49;
@@ -41,13 +49,22 @@
   public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
   public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
 
+  public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
+  public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, 
(byte)0x9A, 0x00, 0x00 };
+  public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // 
Windows 3.x
+  // TODO: DIB, PICT
+
   public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
 
+  public static final byte[] COMPRESSED1 = { (byte)0xFE, 0x78, (byte)0xDA };
+  public static final byte[] COMPRESSED2 = { (byte)0xFE, 0x78, (byte)0x9C };
+
   private int dataBlockStartOfsset;
   private int pictureBytesStartOffset;
   private int dataBlockSize;
   private int size;
 //  private String fileName;
+  private byte[] rawContent;
   private byte[] content;
   private byte[] _dataStream;
   private int aspectRatioX;
@@ -77,9 +94,12 @@
 
     if (fillBytes)
     {
-      fillImageContent(_dataStream);
+      fillImageContent();
     }
+  }
 
+  private void fillWidthHeight()
+  {
     String ext = suggestFileExtension();
     // trying to extract width and height from pictures content:
     if ("jpg".equalsIgnoreCase(ext)) {
@@ -121,8 +141,8 @@
    */
   public void writeImageContent(OutputStream out) throws IOException
   {
-    if (content!=null && content.length>0) {
-      out.write(content, 0, size);
+    if (rawContent!=null && rawContent.length>0) {
+      out.write(rawContent, 0, size);
     } else {
       out.write(_dataStream, pictureBytesStartOffset, size);
     }
@@ -135,11 +155,20 @@
   {
     if (content == null || content.length<=0)
     {
-      fillImageContent(this._dataStream);
+      fillImageContent();
     }
     return content;
   }
 
+  public byte[] getRawContent()
+  {
+    if (rawContent == null || rawContent.length <= 0)
+    {
+      fillRawImageContent();
+    }
+    return rawContent;
+  }
+
   /**
    *
    * @return size in bytes of the picture
@@ -171,10 +200,12 @@
    */
   public String suggestFileExtension()
   {
-    if (content!=null && content.length>0) {
-      return suggestFileExtension(content, 0);
+    String extension = suggestFileExtension(_dataStream, 
pictureBytesStartOffset);
+    if ("".equals(extension)) {
+      // May be compressed.  Get the uncompressed content and inspect that.
+      extension = suggestFileExtension(getContent(), 0);
     }
-    return suggestFileExtension(_dataStream, pictureBytesStartOffset);
+    return extension;
   }
 
 
@@ -188,11 +219,16 @@
       return "gif";
     } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
       return "bmp";
-    } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
-      return "tiff";
-    } else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
+    } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) ||
+               matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
       return "tiff";
+    } else if (matchSignature(content, WMF1, 0) ||
+               matchSignature(content, WMF2, 0)) {
+      return "wmf";
+    } else if (matchSignature(content, EMF, 0)) {
+      return "emf";
     }
+    // TODO: DIB, PICT
     return "";
   }
 
@@ -233,10 +269,44 @@
 //        return fileName.trim();
 //    }
 
-  private void fillImageContent(byte[] dataStream)
+  private void fillRawImageContent()
   {
-    this.content = new byte[size];
-    System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
+    this.rawContent = new byte[size];
+    System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, 
size);
+  }
+
+  private void fillImageContent()
+  {
+    byte[] rawContent = getRawContent();
+
+    // HACK: Detect compressed images.  In reality there should be some way to 
determine
+    //       this from the first 32 bytes, but I can't see any similarity 
between all the
+    //       samples I have obtained, nor any similarity in the data block 
contents.
+    if (matchSignature(rawContent, COMPRESSED1, 32) || 
matchSignature(rawContent, COMPRESSED2, 32))
+    {
+      try
+      {
+        InflaterInputStream in = new InflaterInputStream(
+          new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        byte[] buf = new byte[4096];
+        int readBytes;
+        while ((readBytes = in.read(buf)) > 0)
+        {
+          out.write(buf, 0, readBytes);
+        }
+        content = out.toByteArray();
+      }
+      catch (IOException e)
+      {
+        // Problems reading from the actual ByteArrayInputStream should never 
happen
+        // so this will only ever be a ZipException.
+        log.log(POILogger.INFO, "Possibly corrupt compression or 
non-compressed data", e);
+      }
+    } else {
+      // Raw data is not compressed.
+      content = rawContent;
+    }
   }
 
   private static int getPictureBytesStartOffset(int dataBlockStartOffset, 
byte[] _dataStream, int dataBlockSize)
@@ -322,18 +392,28 @@
       this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
     }
   }
+
   /**
    * returns pixel width of the picture or -1 if dimensions determining was 
failed
    */
   public int getWidth()
   {
+    if (width == -1)
+    {
+      fillWidthHeight();
+    }
     return width;
   }
+
   /**
    * returns pixel height of the picture or -1 if dimensions determining was 
failed
    */
   public int getHeight()
   {
+    if (height == -1)
+    {
+      fillWidthHeight();
+    }
     return height;
   }
 

Modified: 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java
URL: 
http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java?view=diff&rev=480585&r1=480584&r2=480585
==============================================================================
--- 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java
 (original)
+++ 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/TestHWPFPictures.java
 Wed Nov 29 06:49:40 2006
@@ -31,38 +31,40 @@
  * @author nick
  */
 public class TestHWPFPictures extends TestCase {
-       private HWPFDocument docA;
-       private HWPFDocument docB;
        private String docAFile;
        private String docBFile;
+       private String docCFile;
        
        private String imgAFile;
        private String imgBFile;
+       private String imgCFile;
        
        protected void setUp() throws Exception {
                String dirname = System.getProperty("HWPF.testdata.path");
                
                docAFile = dirname + "/testPictures.doc";
                docBFile = dirname + "/two_images.doc";
+               docCFile = dirname + "/vector_image.doc";
                
                imgAFile = dirname + "/simple_image.jpg";
                imgBFile = dirname + "/simple_image.png";
+               imgCFile = dirname + "/vector_image.emf";
        }
        
        /**
         * Test just opening the files
         */
        public void testOpen() throws Exception {
-               docA = new HWPFDocument(new FileInputStream(docAFile));
-               docB = new HWPFDocument(new FileInputStream(docBFile));
+               HWPFDocument docA = new HWPFDocument(new 
FileInputStream(docAFile));
+               HWPFDocument docB = new HWPFDocument(new 
FileInputStream(docBFile));
        }
        
        /**
         * Test that we have the right numbers of images in each file
         */
        public void testImageCount() throws Exception {
-               docA = new HWPFDocument(new FileInputStream(docAFile));
-               docB = new HWPFDocument(new FileInputStream(docBFile));
+               HWPFDocument docA = new HWPFDocument(new 
FileInputStream(docAFile));
+               HWPFDocument docB = new HWPFDocument(new 
FileInputStream(docBFile));
                
                assertNotNull(docA.getPicturesTable());
                assertNotNull(docB.getPicturesTable());
@@ -81,7 +83,7 @@
         * Test that we have the right images in at least one file
         */
        public void testImageData() throws Exception {
-               docB = new HWPFDocument(new FileInputStream(docBFile));
+               HWPFDocument docB = new HWPFDocument(new 
FileInputStream(docBFile));
                PicturesTable picB = docB.getPicturesTable();
                List picturesB = picB.getAllPictures();
                
@@ -102,6 +104,26 @@
 
                assertBytesSame(pic1B, pic1.getContent());
                assertBytesSame(pic2B, pic2.getContent());
+       }
+       
+       /**
+        * Test that compressed image data is correctly returned.
+        */
+       public void testCompressedImageData() throws Exception {
+               HWPFDocument docC = new HWPFDocument(new 
FileInputStream(docCFile));
+               PicturesTable picC = docC.getPicturesTable();
+               List picturesC = picC.getAllPictures();
+               
+               assertEquals(1, picturesC.size());
+               
+               Picture pic = (Picture)picturesC.get(0);
+               assertNotNull(pic);
+               
+               // Check the same
+               byte[] picBytes = readFile(imgCFile);
+               
+               assertEquals(picBytes.length, pic.getContent().length);
+               assertBytesSame(picBytes, pic.getContent());
        }
        
        

Added: 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc
URL: 
http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc?view=auto&rev=480585
==============================================================================
Binary file - no diff available.

Propchange: 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf
URL: 
http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf?view=auto&rev=480585
==============================================================================
Binary file - no diff available.

Propchange: 
jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/vector_image.emf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
Mailing List:    http://jakarta.apache.org/site/mail2.html#poi
The Apache Jakarta POI Project: http://jakarta.apache.org/poi/

Reply via email to