Author: dflorey
Date: Sat Dec 31 03:47:45 2005
New Revision: 360272

URL: http://svn.apache.org/viewcvs?rev=360272&view=rev
Log:
Refactored the PropertyExtractor interface to allow more sophicticated
property extraction.

Modified:
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
    jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
    
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
 (original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
 Sat Dec 31 03:47:45 2005
@@ -26,6 +26,9 @@
 import java.io.InputStream;
 import java.util.Map;
 
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
 /**
  * The AbstractPropertyExtractor class
  */
@@ -45,7 +48,7 @@
         this.namespace = namespace;
     }
 
-    public abstract Map extract(InputStream content) throws ExtractorException;
+    public abstract Map extract(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor, InputStream content) throws 
ExtractorException;
 
     /* (non-Javadoc)
      * @see org.apache.slide.extractor.Extractor#getContentType()

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java 
(original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java 
Sat Dec 31 03:47:45 2005
@@ -28,6 +28,7 @@
 import java.util.Enumeration;
 import java.util.Iterator;
 import java.util.List;
+
 import org.apache.slide.content.NodeRevisionDescriptor;
 import org.apache.slide.content.NodeRevisionDescriptors;
 import org.apache.slide.util.conf.Configurable;

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java 
(original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java 
Sat Dec 31 03:47:45 2005
@@ -29,6 +29,7 @@
 import java.io.InputStream;
 import java.io.Reader;
 import java.util.Iterator;
+
 import org.apache.poi.hssf.usermodel.HSSFCell;
 import org.apache.poi.hssf.usermodel.HSSFRow;
 import org.apache.poi.hssf.usermodel.HSSFSheet;

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
 (original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
 Sat Dec 31 03:47:45 2005
@@ -29,6 +29,7 @@
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
+
 import org.apache.poi.poifs.eventfilesystem.POIFSReader;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java 
(original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java 
Sat Dec 31 03:47:45 2005
@@ -27,6 +27,7 @@
 import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
+
 import org.textmining.text.extraction.WordExtractor;
 
 /**

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java 
(original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java 
Sat Dec 31 03:47:45 2005
@@ -7,6 +7,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.poi.hpsf.NoPropertySetStreamException;
 import org.apache.poi.hpsf.Property;
 import org.apache.poi.hpsf.PropertySet;
@@ -16,6 +17,8 @@
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
 import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
 import org.apache.slide.util.conf.Configurable;
 import org.apache.slide.util.conf.Configuration;
 import org.apache.slide.util.conf.ConfigurationException;
@@ -100,7 +103,7 @@
                super(uri, contentType, namespace);
        }
 
-       public Map extract(InputStream content) throws ExtractorException {
+       public Map extract(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor, InputStream content) throws 
ExtractorException {
                OfficePropertiesListener listener = new 
OfficePropertiesListener();
                try {
                        POIFSReader r = new POIFSReader();

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java 
(original)
+++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java 
Sat Dec 31 03:47:45 2005
@@ -28,6 +28,7 @@
 import java.io.FileInputStream;
 import java.io.InputStream;
 import java.io.Reader;
+
 import org.pdfbox.pdfparser.PDFParser;
 import org.pdfbox.pdmodel.PDDocument;
 import org.pdfbox.util.PDFTextStripper;

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java 
(original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java 
Sat Dec 31 03:47:45 2005
@@ -26,6 +26,9 @@
 import java.io.InputStream;
 import java.util.Map;
 
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
+
 /**
  * The PropertyExtractor interface
  * 
@@ -36,5 +39,5 @@
      *  Gets extracted property value from the resource, for example "author"
      *  for a word doc, ...
      */
-    public Map extract(InputStream content) throws ExtractorException;
+    public Map extract(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor, InputStream content) throws 
ExtractorException;
 }

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
 (original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
 Sat Dec 31 03:47:45 2005
@@ -54,7 +54,7 @@
             if ( content != null && descriptor != null ) {
                 List extractor = 
ExtractorManager.getInstance().getPropertyExtractors(namespaceName, 
descriptors, descriptor);
                 for ( int i = 0, l = extractor.size(); i < l; i++ ) {
-                    Map extractedProperties = 
((PropertyExtractor)extractor.get(i)).extract(new 
ByteArrayInputStream(content.getContentBytes()));
+                    Map extractedProperties = 
((PropertyExtractor)extractor.get(i)).extract(descriptors, descriptor, new 
ByteArrayInputStream(content.getContentBytes()));
                     for ( Iterator j = 
extractedProperties.entrySet().iterator(); j.hasNext(); ) {
                         Map.Entry entry = (Map.Entry) j.next();
                         final Object key = entry.getKey();

Modified: 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
URL: 
http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java?rev=360272&r1=360271&r2=360272&view=diff
==============================================================================
--- 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
 (original)
+++ 
jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
 Sat Dec 31 03:47:45 2005
@@ -31,7 +31,10 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.slide.common.PropertyName;
+import org.apache.slide.content.NodeRevisionDescriptor;
+import org.apache.slide.content.NodeRevisionDescriptors;
 import org.apache.slide.util.conf.Configurable;
 import org.apache.slide.util.conf.Configuration;
 import org.apache.slide.util.conf.ConfigurationException;
@@ -93,7 +96,7 @@
         super(uri, contentType, namespace);
     }
 
-    public Map extract(InputStream content) throws ExtractorException {
+    public Map extract(NodeRevisionDescriptors descriptors, 
NodeRevisionDescriptor descriptor, InputStream content) throws 
ExtractorException {
         Map properties = new HashMap();
         try {
             SAXBuilder saxBuilder = new SAXBuilder();



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to