Author: dflorey Date: Sat Dec 31 03:47:45 2005 New Revision: 360272 URL: http://svn.apache.org/viewcvs?rev=360272&view=rev Log: Refactored the PropertyExtractor interface to allow more sophicticated property extraction.
Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java Sat Dec 31 03:47:45 2005 @@ -26,6 +26,9 @@ import java.io.InputStream; import java.util.Map; +import org.apache.slide.content.NodeRevisionDescriptor; +import org.apache.slide.content.NodeRevisionDescriptors; + /** * The AbstractPropertyExtractor class */ @@ -45,7 +48,7 @@ this.namespace = namespace; } - public abstract Map extract(InputStream content) throws ExtractorException; + public abstract Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor, InputStream content) throws ExtractorException; /* (non-Javadoc) * @see org.apache.slide.extractor.Extractor#getContentType() Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/ExtractorManager.java Sat Dec 31 03:47:45 2005 @@ -28,6 +28,7 @@ import java.util.Enumeration; import java.util.Iterator; import java.util.List; + import org.apache.slide.content.NodeRevisionDescriptor; import org.apache.slide.content.NodeRevisionDescriptors; import org.apache.slide.util.conf.Configurable; Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSExcelExtractor.java Sat Dec 31 03:47:45 2005 @@ -29,6 +29,7 @@ import java.io.InputStream; import java.io.Reader; import java.util.Iterator; + import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java Sat Dec 31 03:47:45 2005 @@ -29,6 +29,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; + import org.apache.poi.poifs.eventfilesystem.POIFSReader; import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/MSWordExtractor.java Sat Dec 31 03:47:45 2005 @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.Reader; import java.io.StringReader; + import org.textmining.text.extraction.WordExtractor; /** Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/OfficeExtractor.java Sat Dec 31 03:47:45 2005 @@ -7,6 +7,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; + import org.apache.poi.hpsf.NoPropertySetStreamException; import org.apache.poi.hpsf.Property; import org.apache.poi.hpsf.PropertySet; @@ -16,6 +17,8 @@ import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; import org.apache.slide.common.PropertyName; +import org.apache.slide.content.NodeRevisionDescriptor; +import org.apache.slide.content.NodeRevisionDescriptors; import org.apache.slide.util.conf.Configurable; import org.apache.slide.util.conf.Configuration; import org.apache.slide.util.conf.ConfigurationException; @@ -100,7 +103,7 @@ super(uri, contentType, namespace); } - public Map extract(InputStream content) throws ExtractorException { + public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor, InputStream content) throws ExtractorException { OfficePropertiesListener listener = new OfficePropertiesListener(); try { POIFSReader r = new POIFSReader(); Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PDFExtractor.java Sat Dec 31 03:47:45 2005 @@ -28,6 +28,7 @@ import java.io.FileInputStream; import java.io.InputStream; import java.io.Reader; + import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractor.java Sat Dec 31 03:47:45 2005 @@ -26,6 +26,9 @@ import java.io.InputStream; import java.util.Map; +import org.apache.slide.content.NodeRevisionDescriptor; +import org.apache.slide.content.NodeRevisionDescriptors; + /** * The PropertyExtractor interface * @@ -36,5 +39,5 @@ * Gets extracted property value from the resource, for example "author" * for a word doc, ... */ - public Map extract(InputStream content) throws ExtractorException; + public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor, InputStream content) throws ExtractorException; } Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java Sat Dec 31 03:47:45 2005 @@ -54,7 +54,7 @@ if ( content != null && descriptor != null ) { List extractor = ExtractorManager.getInstance().getPropertyExtractors(namespaceName, descriptors, descriptor); for ( int i = 0, l = extractor.size(); i < l; i++ ) { - Map extractedProperties = ((PropertyExtractor)extractor.get(i)).extract(new ByteArrayInputStream(content.getContentBytes())); + Map extractedProperties = ((PropertyExtractor)extractor.get(i)).extract(descriptors, descriptor, new ByteArrayInputStream(content.getContentBytes())); for ( Iterator j = extractedProperties.entrySet().iterator(); j.hasNext(); ) { Map.Entry entry = (Map.Entry) j.next(); final Object key = entry.getKey(); Modified: jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java URL: http://svn.apache.org/viewcvs/jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java?rev=360272&r1=360271&r2=360272&view=diff ============================================================================== --- jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java (original) +++ jakarta/slide/trunk/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java Sat Dec 31 03:47:45 2005 @@ -31,7 +31,10 @@ import java.util.Iterator; import java.util.List; import java.util.Map; + import org.apache.slide.common.PropertyName; +import org.apache.slide.content.NodeRevisionDescriptor; +import org.apache.slide.content.NodeRevisionDescriptors; import org.apache.slide.util.conf.Configurable; import org.apache.slide.util.conf.Configuration; import org.apache.slide.util.conf.ConfigurationException; @@ -93,7 +96,7 @@ super(uri, contentType, namespace); } - public Map extract(InputStream content) throws ExtractorException { + public Map extract(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor, InputStream content) throws ExtractorException { Map properties = new HashMap(); try { SAXBuilder saxBuilder = new SAXBuilder(); --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]