Author: johnh
Date: Wed Nov 18 00:14:45 2009
New Revision: 881629
URL: http://svn.apache.org/viewvc?rev=881629&view=rev
Log:
Fixes accommodating more Neko 1.9.13 behavior.
1. Reintroduces document normalization behavior, in particular ensuring that
<body> exists in the returned document.
2. Pushes all <script> elements in parsed HTML <head> element to the start of
<body>, in order. This ensures that gadget scripts referencing document.body
work appropriately. This may have odd side-effects if a particular gadget
server installation oscillates between String- and DOM-based rewriters, since
each String-to-DOM conversion (in MutableContent) will see <head><script> moved
to <body>. Still, this change should be functionally intact.
I'm working on a GadgetHtmlParser subclass using Caja's
non-validating/rejiggering parser now, which better fits with Shindig's use
case. This code will hopefully tide us over until that time.
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Wed Nov 18 00:14:45 2009
@@ -19,6 +19,7 @@
package org.apache.shindig.gadgets.parse.nekohtml;
import org.apache.commons.lang.StringUtils;
+import org.apache.shindig.common.xml.DomUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.xerces.xni.Augmentations;
@@ -48,6 +49,7 @@
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
import java.io.IOException;
import java.io.StringReader;
@@ -107,6 +109,7 @@
Document document = handler.getDocument();
DocumentFragment fragment = handler.getFragment();
normalizeFragment(document, fragment);
+ fixNekoWeirdness(document);
return document;
}
@@ -164,6 +167,51 @@
htmlScanner.scanDocument(true);
return handler;
}
+
+ private void fixNekoWeirdness(Document document) {
+ // Neko as of versions > 1.9.13 stuffs all leading <script> nodes into
<head>.
+ // This breaks all sorts of assumptions in gadgets, notably the existence
of document.body.
+ // We can't tell Neko to avoid putting <script> into <head> however, since
gadgets
+ // like <Content><script>...</script><style>...</style> will break due to
both
+ // <script> and <style> ending up in <body> -- at which point Neko
unceremoniously
+ // drops the <style> (and <link>) elements.
+ // Therefore we just search for <script> elements in <head> and stuff them
all into
+ // the top of <body>.
+ // This method assumes a normalized document as input.
+ Node html = DomUtil.getFirstNamedChildNode(document, "html");
+ if (html.getNextSibling() != null &&
+ html.getNextSibling().getNodeName().equalsIgnoreCase("html")) {
+ // if a doctype is specified, then the desired root <html> node is
wrapped by an <HTML> node
+ // Pull out the <html> root.
+ html = html.getNextSibling();
+ }
+ Node head = DomUtil.getFirstNamedChildNode(html, "head");
+ if (head == null) {
+ head = document.createElement("head");
+ html.insertBefore(head, html.getFirstChild());
+ }
+ NodeList headNodes = head.getChildNodes();
+ Stack<Node> headScripts = new Stack<Node>();
+ for (int i = 0; i < headNodes.getLength(); ++i) {
+ Node headChild = headNodes.item(i);
+ if (headChild.getNodeName().equalsIgnoreCase("script")) {
+ headScripts.add(headChild);
+ }
+ }
+
+ // Remove from head, add to top of <body> in <head> order.
+ Node body = DomUtil.getFirstNamedChildNode(html, "body");
+ if (body == null) {
+ body = document.createElement("body");
+ html.insertBefore(body, head.getNextSibling());
+ }
+ Node bodyFirst = body.getFirstChild();
+ while (headScripts.size() > 0) {
+ Node headScript = headScripts.pop();
+ head.removeChild(headScript);
+ body.insertBefore(headScript, bodyFirst);
+ }
+ }
protected HTMLConfiguration newConfiguration() {
HTMLConfiguration config = new HTMLConfiguration();
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
Wed Nov 18 00:14:45 2009
@@ -68,5 +68,11 @@
String expected =
loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-with-ampersands-expected.html");
parseAndCompareBalanced(content, expected, simple);
}
+
+ public void testScriptPushedToBody() throws Exception {
+ String content =
loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html");
+ String expected =
loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html");
+ parseAndCompareBalanced(content, expected, simple);
+ }
}
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html
Wed Nov 18 00:14:45 2009
@@ -1,2 +1,2 @@
-<html><head><script>document.write("dont add to head or else")</script>
-<style type="text/css"> A { font : bold; }</style></head><body></body></html>
\ No newline at end of file
+<html><head>
+<style type="text/css"> A { font : bold;
}</style></head><body><script>document.write("dont add to head or
else")</script></body></html>
\ No newline at end of file
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html?rev=881629&r1=881628&r2=881629&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html
Wed Nov 18 00:14:45 2009
@@ -1,3 +1,3 @@
<html><head>
<!-- A head tag but no body tag is not good -->
-<script>document.write("dont add to head or else")</script><style
type="text/css"> A { font : bold; } </style></head><body></body></html>
\ No newline at end of file
+<style type="text/css"> A { font : bold; }
</style></head><body><script>document.write("dont add to head or
else")</script></body></html>
\ No newline at end of file
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html?rev=881629&view=auto
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
(added)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript-expected.html
Wed Nov 18 00:14:45 2009
@@ -0,0 +1,6 @@
+<html><head>
+<style>Some CSS here</style>
+
+<link rel="linkrel">
+
+</head><body><script>foo3();</script><script>foo2();</script><script>foo1();</script><div
id="mydiv">mycontent</div></body></html>
\ No newline at end of file
Added:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html?rev=881629&view=auto
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
(added)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-leadingscript.html
Wed Nov 18 00:14:45 2009
@@ -0,0 +1,6 @@
+<script>foo1();</script>
+<style>Some CSS here</style>
+<script>foo2();</script>
+<link rel="linkrel"/>
+<script>foo3();</script>
+<div id="mydiv">mycontent</div>
\ No newline at end of file