Author: dflorey
Date: Tue Feb 14 01:28:53 2006
New Revision: 377676

URL: http://svn.apache.org/viewcvs?rev=377676&view=rev
Log:
Fixed bug to enable concurrent indexing of many MS PowerPoints.

Modified:
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=377676&r1=377675&r2=377676&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
 (original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
 Tue Feb 14 01:28:53 2006
@@ -39,84 +39,67 @@
 /**
  * Content extractor for Microsoft Powerpoint documents.
  */
-public class MSPowerPointExtractor extends AbstractContentExtractor implements 
POIFSReaderListener{
+public class MSPowerPointExtractor extends AbstractContentExtractor {
 
-    static final String CONTENT_TYPE_POWERPOINT_1 = "application/mspowerpoint";
-    static final String CONTENT_TYPE_POWERPOINT_2 = 
"application/vnd.ms-powerpoint";
-    static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = 
CONTENT_TYPE_POWERPOINT_1+","+CONTENT_TYPE_POWERPOINT_2;
-       
-    private ByteArrayOutputStream writer = new ByteArrayOutputStream();
-
-    public MSPowerPointExtractor(String uri, String contentType, String 
namespace) {
-        super(uri, contentType, namespace);
-    }
-
-    public Reader extract(InputStream content)  throws ExtractorException {
-        try {
-            POIFSReader reader = new POIFSReader();
-            reader.registerListener(this);
-            reader.read(content);
-
-            return new InputStreamReader(new 
ByteArrayInputStream(writer.toByteArray()));
-        }
-        catch(Exception e) {
-                throw new ExtractorException(e.getMessage());
-        }
-    }
-
-    public void processPOIFSReaderEvent(POIFSReaderEvent event)
-    {
-        try{
-            if(!event.getName().equalsIgnoreCase("PowerPoint Document"))
-                return;
-
-            DocumentInputStream input = event.getStream();
-
-            byte[] buffer = new byte[input.available()];
-            input.read(buffer, 0, input.available());
-
-            for(int i=0; i<buffer.length-20; i++)
-            {
-                long type = LittleEndian.getUShort(buffer,i+2);
-                long size = LittleEndian.getUInt(buffer,i+4);
-
-                if(type==4008)
-                {
-                    writer.write(buffer, i + 4 + 1, (int) size +3);
-                    i = i + 4 + 1 + (int) size - 1;
-
-                }
-            }
-        }
-        catch (Exception e)
-        {
-
-        }
-    }
-
-    public static void main(String[] args) throws Exception
-    {
-        FileInputStream in = new FileInputStream(args[0]);
-
-        MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null, null);
-
-        Reader reader = ex.extract(in);
-
-        int c;
-        do
-        {
-            c = reader.read();
-
-            System.out.print((char)c);
-        }
-        while( c != -1 );
-    }
-    
-       /* (non-Javadoc)
+       static final String CONTENT_TYPE_POWERPOINT_1 = 
"application/mspowerpoint";
+
+       static final String CONTENT_TYPE_POWERPOINT_2 = 
"application/vnd.ms-powerpoint";
+
+       static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = 
CONTENT_TYPE_POWERPOINT_1
+                       + "," + CONTENT_TYPE_POWERPOINT_2;
+
+       public MSPowerPointExtractor(String uri, String contentType,
+                       String namespace) {
+               super(uri, contentType, namespace);
+       }
+
+       public Reader extract(InputStream content) throws ExtractorException {
+               try {
+                       final ByteArrayOutputStream writer = new 
ByteArrayOutputStream();
+
+                       POIFSReader reader = new POIFSReader();
+                       reader.registerListener(new POIFSReaderListener() {
+                               public void 
processPOIFSReaderEvent(POIFSReaderEvent event) {
+                                       try {
+                                               if 
(!event.getName().equalsIgnoreCase(
+                                                               "PowerPoint 
Document"))
+                                                       return;
+
+                                               DocumentInputStream input = 
event.getStream();
+
+                                               byte[] buffer = new 
byte[input.available()];
+                                               input.read(buffer, 0, 
input.available());
+
+                                               for (int i = 0; i < 
buffer.length - 20; i++) {
+                                                       long type = 
LittleEndian.getUShort(buffer, i + 2);
+                                                       long size = 
LittleEndian.getUInt(buffer, i + 4);
+
+                                                       if (type == 4008) {
+                                                               
writer.write(buffer, i + 4 + 1, (int) size + 3);
+                                                               i = i + 4 + 1 + 
(int) size - 1;
+                                                       }
+                                               }
+                                       } catch (Exception e) {
+
+                                       }
+                               }
+                       });
+                       reader.read(content);
+
+                       return new InputStreamReader(new 
ByteArrayInputStream(writer
+                                       .toByteArray()));
+               } catch (Exception e) {
+                       throw new ExtractorException(e.getMessage());
+               }
+       }
+
+       /*
+        * (non-Javadoc)
+        * 
         * @see org.apache.slide.extractor.Extractor#getContentType()
         */
        public String getContentType() {
-               if(super.getContentType()==null){
+               if (super.getContentType() == null) {
                        return CONTENT_TYPE_POWERPOINT_ALL_CSV;
                }
                return super.getContentType();



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to