Author: nick
Date: Tue Aug 30 14:59:56 2011
New Revision: 1163248
URL: http://svn.apache.org/viewvc?rev=1163248&view=rev
Log:
TIKA-700 Upgrade the POI dependency to 3.8 Beta 4
Modified:
tika/trunk/tika-parsers/pom.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
Modified: tika/trunk/tika-parsers/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1163248&r1=1163247&r2=1163248&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Tue Aug 30 14:59:56 2011
@@ -35,7 +35,7 @@
<url>http://tika.apache.org/</url>
<properties>
- <poi.version>3.8-beta3</poi.version>
+ <poi.version>3.8-beta4</poi.version>
</properties>
<dependencies>
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=1163248&r1=1163247&r2=1163248&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
Tue Aug 30 14:59:56 2011
@@ -26,8 +26,8 @@ import org.apache.poi.extractor.Extracto
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
@@ -78,7 +78,7 @@ public class OOXMLExtractorFactory {
"Expecting UserModel based POI OOXML extractor with a
document, but none found. " +
"The extractor returned was a " + poiExtractor
);
- } else if (document instanceof XSLFSlideShow) {
+ } else if (document instanceof XMLSlideShow) {
extractor = new XSLFPowerPointExtractorDecorator(
context, (XSLFPowerPointExtractor) poiExtractor);
} else if (document instanceof XWPFDocument) {
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=1163248&r1=1163247&r2=1163248&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
Tue Aug 30 14:59:56 2011
@@ -28,10 +28,11 @@ import org.apache.poi.openxml4j.opc.Pack
import org.apache.poi.openxml4j.opc.TargetMode;
import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.DrawingParagraph;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
-import org.apache.poi.xslf.usermodel.DrawingParagraph;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
@@ -39,7 +40,6 @@ import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
import org.xml.sax.SAXException;
@@ -55,20 +55,34 @@ public class XSLFPowerPointExtractorDeco
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
XmlException, IOException {
- XSLFSlideShow slideShow = (XSLFSlideShow) extractor.getDocument();
- XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
+ XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
+ XSLFSlideShow rawSlideShow = null;
+ try {
+ rawSlideShow = slideShow._getXSLFSlideShow(); // TODO Avoid this in
future
+ } catch(Exception e) {
+ throw new IOException(e);
+ }
- XSLFSlide[] slides = xmlSlideShow.getSlides();
+ XSLFSlide[] slides = slideShow.getSlides();
for (XSLFSlide slide : slides) {
- CTSlideIdListEntry slideId = slide._getCTSlideId();
-
- CTNotesSlide notes = xmlSlideShow._getXSLFSlideShow().getNotes(
- slideId);
- CTCommentList comments = xmlSlideShow._getXSLFSlideShow()
- .getSlideComments(slideId);
+ // Find the ID, until we ditch the raw slideshow
+ CTSlideIdListEntry slideId = null;
+ for(CTSlideIdListEntry id :
rawSlideShow.getSlideReferences().getSldIdList()) {
+
if(rawSlideShow.getSlidePart(id).getPartName().equals(slide.getPackagePart().getPartName()))
{
+ slideId = id;
+ }
+ }
+ if(slideId == null) {
+ // This shouldn't normally happen
+ continue;
+ }
+
+ CTNotesSlide notes = rawSlideShow.getNotes(slideId);
+ CTCommentList comments = rawSlideShow.getSlideComments(slideId);
xhtml.startElement("div");
- extractShapeContent(slide.getCommonSlideData(), xhtml);
+ XSLFCommonSlideData common = new
XSLFCommonSlideData(slide.getXmlObject().getCSld());
+ extractShapeContent(common, xhtml);
if (comments != null) {
for (CTComment comment : comments.getCmArray()) {
@@ -97,7 +111,13 @@ public class XSLFPowerPointExtractorDeco
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
List<PackagePart> parts = new ArrayList<PackagePart>();
- XSLFSlideShow document = (XSLFSlideShow) extractor.getDocument();
+ XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
+ XSLFSlideShow document = null;
+ try {
+ document = slideShow._getXSLFSlideShow(); // TODO Avoid this in
future
+ } catch(Exception e) {
+ throw new TikaException(e.getMessage());
+ }
for (CTSlideIdListEntry ctSlide :
document.getSlideReferences().getSldIdList()) {
// Add the slide
@@ -113,9 +133,7 @@ public class XSLFPowerPointExtractorDeco
// If it has drawings, return those too
try {
- // TODO Improve when we upgrade POI
-// for(PackageRelationship rel :
slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
- for(PackageRelationship rel :
slidePart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing"))
{
+ for(PackageRelationship rel :
slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
if(rel.getTargetMode() == TargetMode.INTERNAL) {
PackagePartName relName =
PackagingURIHelper.createPartName(rel.getTargetURI());
parts.add( rel.getPackage().getPart(relName) );