Author: lindner
Date: Sun May 10 06:11:59 2009
New Revision: 773308
URL: http://svn.apache.org/viewvc?rev=773308&view=rev
Log:
SHINDIG-987 | Applied patch from Vincent with suggested modifications from Adam
| NekoParser returns cryptic error messages when parsing bad html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=773308&r1=773307&r2=773308&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Sun May 10 06:11:59 2009
@@ -17,6 +17,7 @@
*/
package org.apache.shindig.gadgets.parse.nekohtml;
+import org.apache.commons.lang.StringUtils;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerializer;
@@ -37,6 +38,7 @@
import org.cyberneko.html.HTMLScanner;
import org.cyberneko.html.HTMLTagBalancer;
import org.cyberneko.html.filters.NamespaceBinder;
+import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
@@ -292,7 +294,34 @@
element.setAttributeNS(xmlAttributes.getURI(i),
xmlAttributes.getQName(i),
xmlAttributes.getValue(i));
} else {
- element.setAttribute(xmlAttributes.getLocalName(i) ,
xmlAttributes.getValue(i));
+ try {
+ element.setAttribute(xmlAttributes.getLocalName(i), xmlAttributes
+ .getValue(i));
+ } catch (DOMException e) {
+ switch (e.code) {
+ case DOMException.INVALID_CHARACTER_ERR:
+ StringBuilder sb = new StringBuilder(e.getMessage());
+ sb.append("Around ...<");
+ if (qName.prefix != null) {
+ sb.append(qName.prefix);
+ sb.append(":");
+ }
+ sb.append(qName.localpart);
+ for (int j = 0; j < xmlAttributes.getLength(); j++) {
+ if (StringUtils.isNotBlank(xmlAttributes.getLocalName(j))
+ && StringUtils.isNotBlank(xmlAttributes.getValue(j))) {
+ sb.append(' ');
+ sb.append(xmlAttributes.getLocalName(j));
+ sb.append("=\"");
+ sb.append(xmlAttributes.getValue(j)).append('\"');
+ }
+ }
+ sb.append("...");
+ throw new DOMException(DOMException.INVALID_CHARACTER_ERR,
sb.toString());
+ default:
+ throw e;
+ }
+ }
}
}
appendChild(element);
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java?rev=773308&r1=773307&r2=773308&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
Sun May 10 06:11:59 2009
@@ -28,6 +28,7 @@
import org.apache.shindig.gadgets.spec.PipelinedData;
import org.junit.Before;
import org.junit.Test;
+import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@@ -115,6 +116,19 @@
assertEquals("Some content", spanElements.item(0).getTextContent());
}
+ @Test
+ public void testInvalid() throws Exception {
+ String content = "<html><div id=\"div_super\" class=\"div_super\"
valign:\"middle\"></div></html>";
+ try {
+ parser.parseDom(content);
+ assertTrue("No exception caught", false);
+ } catch (DOMException e) {
+ assertTrue(e.getMessage().contains("INVALID_CHARACTER_ERR"));
+ assertTrue(e.getMessage().contains(
+ "Around ...<div id=\"div_super\" class=\"div_super\"..."));
+ }
+ }
+
private List<Element> getScripts(final String type) {
NodeIterator nodeIterator = ((DocumentTraversal) document)
.createNodeIterator(document, NodeFilter.SHOW_ELEMENT,