Author: jukka Date: Tue Mar 18 17:17:25 2008 New Revision: 638656 URL: http://svn.apache.org/viewvc?rev=638656&view=rev Log: TIKA-131: Lazy XHTML prefix generation
Modified: incubator/tika/trunk/CHANGES.txt incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java Modified: incubator/tika/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=638656&r1=638655&r2=638656&view=diff ============================================================================== --- incubator/tika/trunk/CHANGES.txt (original) +++ incubator/tika/trunk/CHANGES.txt Tue Mar 18 17:17:25 2008 @@ -29,6 +29,9 @@ 12. TIKA-130 - self-or-descendant axis does not match self in streaming XPath (Jukka Zitting) +13. TIKA-131 - Lazy XHTML prefix generation (Jukka Zitting) + + Release 0.1-incubating - 12/27/2007 1. TIKA-5 - Port Metadata Framework from Nutch (mattmann) Modified: incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=638656&r1=638655&r2=638656&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java Tue Mar 18 17:17:25 2008 @@ -17,6 +17,7 @@ package org.apache.tika.sax; import org.apache.tika.metadata.Metadata; +import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; @@ -38,14 +39,29 @@ */ private final Metadata metadata; + /** + * Flag to indicate whether the document element has been started. + */ + private boolean started = false; + public XHTMLContentHandler(ContentHandler handler, Metadata metadata) { super(handler); this.metadata = metadata; } /** - * Starts an XHTML document by setting up the namespace mappings and - * writing following header: + * Starts an XHTML document by setting up the namespace mappings. + * The standard XHTML prefix is generated lazily when the first + * element is started. + */ + @Override + public void startDocument() throws SAXException { + super.startDocument(); + startPrefixMapping("", XHTML); + } + + /** + * Generates the following XHTML prefix when called for the first time: * <pre> * <html> * <head> @@ -54,19 +70,20 @@ * <body> * </pre> */ - public void startDocument() throws SAXException { - super.startDocument(); - startPrefixMapping("", XHTML); - startElement("html"); - startElement("head"); - startElement("title"); - String title = metadata.get(Metadata.TITLE); - if (title != null && title.length() > 0) { - characters(title); + private void lazyStartDocument() throws SAXException { + if (!started) { + started = true; + startElement("html"); + startElement("head"); + startElement("title"); + String title = metadata.get(Metadata.TITLE); + if (title != null && title.length() > 0) { + characters(title); + } + endElement("title"); + endElement("head"); + startElement("body"); } - endElement("title"); - endElement("head"); - startElement("body"); } /** @@ -77,11 +94,21 @@ * </html> * </pre> */ + @Override public void endDocument() throws SAXException { + lazyStartDocument(); endElement("body"); endElement("html"); endPrefixMapping(""); super.endDocument(); + } + + @Override + public void startElement( + String uri, String local, String name, Attributes attributes) + throws SAXException { + lazyStartDocument(); + super.startElement(uri, local, name, attributes); } public void startElement(String name) throws SAXException {