Author: lryan
Date: Fri Nov 7 14:11:30 2008
New Revision: 712290
URL: http://svn.apache.org/viewvc?rev=712290&view=rev
Log:
Work around for issue in Neko parser incorrectly reporting location of certain
content. See
https://sourceforge.net/tracker2/?func=detail&aid=2236681&group_id=195122&atid=952178
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=712290&r1=712289&r2=712290&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Fri Nov 7 14:11:30 2008
@@ -17,13 +17,23 @@
*/
package org.apache.shindig.gadgets.parse.nekohtml;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Lists;
-import com.google.inject.Inject;
import org.apache.shindig.gadgets.parse.DomUtil;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerializer;
-import org.apache.xerces.xni.*;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import com.google.inject.Inject;
+
+import org.apache.xerces.xni.Augmentations;
+import org.apache.xerces.xni.NamespaceContext;
+import org.apache.xerces.xni.QName;
+import org.apache.xerces.xni.XMLAttributes;
+import org.apache.xerces.xni.XMLDocumentHandler;
+import org.apache.xerces.xni.XMLLocator;
+import org.apache.xerces.xni.XMLResourceIdentifier;
+import org.apache.xerces.xni.XMLString;
+import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLDocumentSource;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xml.serialize.HTMLSerializer;
@@ -31,7 +41,12 @@
import org.cyberneko.html.HTMLEventInfo;
import org.cyberneko.html.HTMLScanner;
import org.cyberneko.html.HTMLTagBalancer;
-import org.w3c.dom.*;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.Text;
import java.io.IOException;
import java.io.StringReader;
@@ -160,24 +175,43 @@
public void handleEvent(boolean shouldClose, Object content, Augmentations
augs) {
HTMLEventInfo info = getEventInfo(augs);
if (info.isSynthesized()) {
- // NOTE! Remove this to balance syntesized close tags
+ // NOTE! Remove this to balance synthesized close tags
if (!shouldClose) return;
// Must close with existing content
String unstructured = getUnstructuredString(startCharOffsets,
lastCharOffsets);
- elementStack.peek().appendChild(document.createTextNode(unstructured));
- startCharOffsets[0] = -1;
- lastCharOffsets[0] = -1;
if (content != null) {
-
elementStack.peek().appendChild(document.createTextNode(content.toString()));
+ unstructured += content.toString();
+ }
+ if (unstructured.length() > 0) {
+
elementStack.peek().appendChild(document.createTextNode(unstructured));
}
+ startCharOffsets[0] = -1;
+ lastCharOffsets[0] = -1;
} else {
if (shouldClose) {
String unstructured = getUnstructuredString(startCharOffsets,
lastCharOffsets);
-
elementStack.peek().appendChild(document.createTextNode(unstructured));
+ if (unstructured.length() > 0) {
+
elementStack.peek().appendChild(document.createTextNode(unstructured));
+ }
startCharOffsets[0] = -1;
lastCharOffsets[0] = -1;
} else if (startCharOffsets[0] == -1) {
recordStartEnd(info, startCharOffsets);
+ // HACK - Neko seems to be incorrectly reporting the start offset
for certain content
+ // See
https://sourceforge.net/tracker2/?func=detail&aid=2236681&group_id=195122&atid=952178
+ if (content != null) {
+ String contentString = content.toString();
+ if (startCharOffsets[1] - startCharOffsets[0] !=
contentString.length()) {
+ Node lastChild = elementStack.peek().getLastChild();
+ if (lastChild instanceof Text) {
+ ((Text)lastChild).appendData(contentString);
+ } else {
+
elementStack.peek().appendChild(document.createTextNode(contentString));
+ }
+ startCharOffsets[0] = -1;
+ lastCharOffsets[0] = -1;
+ }
+ }
lastCharOffsets[0] = -1;
} else {
recordStartEnd(info, lastCharOffsets);
@@ -199,7 +233,8 @@
}
public void startDocument(XMLLocator xmlLocator, String encoding,
- NamespaceContext namespaceContext, Augmentations
augs) throws XNIException {
+ NamespaceContext namespaceContext, Augmentations
augs)
+ throws XNIException {
document = documentFactory.createDocument(null, null, null);
elementStack.clear();
documentFragment = document.createDocumentFragment();
@@ -207,12 +242,14 @@
//trace("StartDoc", augs);
}
- public void xmlDecl(String version, String encoding, String standalone,
Augmentations augs) throws XNIException {
+ public void xmlDecl(String version, String encoding, String standalone,
Augmentations augs)
+ throws XNIException {
//trace("xmlDecl", augs);
handleEvent(false, null, augs);
}
- public void doctypeDecl(String rootElement, String publicId, String
systemId, Augmentations augs) throws XNIException {
+ public void doctypeDecl(String rootElement, String publicId, String
systemId,
+ Augmentations augs) throws XNIException {
// Recreate the document with the specific doctype
document = documentFactory.createDocument(null, null,
documentFactory.createDocumentType(rootElement, publicId, systemId));
@@ -229,12 +266,14 @@
//trackInfo(augs);
}
- public void processingInstruction(String s, XMLString xmlString,
Augmentations augs) throws XNIException {
+ public void processingInstruction(String s, XMLString xmlString,
Augmentations augs)
+ throws XNIException {
//trace("PI", augs);
handleEvent(false, xmlString, augs);
}
- public void startElement(QName qName, XMLAttributes xmlAttributes,
Augmentations augs) throws XNIException {
+ public void startElement(QName qName, XMLAttributes xmlAttributes,
Augmentations augs)
+ throws XNIException {
//trace("StartElem(" + qName.rawname + ")", augs);
if (elements.contains(qName.rawname.toLowerCase())) {
handleEvent(true, null, augs);
@@ -249,7 +288,8 @@
}
}
- public void emptyElement(QName qName, XMLAttributes xmlAttributes,
Augmentations augs) throws XNIException {
+ public void emptyElement(QName qName, XMLAttributes xmlAttributes,
Augmentations augs)
+ throws XNIException {
//trace("EmptyElemm(" + qName.rawname + ")", augs);
if (elements.contains(qName.rawname.toLowerCase())) {
handleEvent(true, null, augs);
@@ -264,7 +304,8 @@
}
- public void startGeneralEntity(String s, XMLResourceIdentifier
xmlResourceIdentifier, String s1, Augmentations augs) throws XNIException {
+ public void startGeneralEntity(String s, XMLResourceIdentifier
xmlResourceIdentifier, String s1,
+ Augmentations augs) throws XNIException {
//trace("StartEntity(" + s + ")", augs);
handleEvent(false, null, augs);
}