Author: lryan
Date: Wed Nov 26 17:22:21 2008
New Revision: 721058
URL: http://svn.apache.org/viewvc?rev=721058&view=rev
Log:
New Neko based HTML serializer.
- Serializes simplified DOM 6x faster than previous serializer.
- Full DOM serializes 2x faster.
- Doctype serialization now exactly matches input
Parser fixes to better maintain entity references in parsed content and avoid
redudant escaping.
Added:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
- copied, changed from r721042,
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
Wed Nov 26 17:22:21 2008
@@ -23,9 +23,6 @@
import com.google.inject.Inject;
import com.google.inject.Singleton;
-
-import org.apache.xml.serialize.HTMLSerializer;
-import org.apache.xml.serialize.OutputFormat;
import org.cyberneko.html.parsers.DOMFragmentParser;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.DOMImplementation;
@@ -36,15 +33,11 @@
import java.io.IOException;
import java.io.StringReader;
-import java.io.StringWriter;
/**
- * Parser that uses the NekoHtml parser.
+ * Parser that uses the NekoHtml parser and produces an un-abridged DOM
*
- * TODO:
- * Currently this code uses the ParsedXXX wrapper types so we can share
abstraction
- * with Caja. This is probably unnecessary overhead and we would prefer that
Caja
- * implements up to org.w3c.dom (or perhaps the Caja wrapper types should?)
+ * TODO: Create a reusable instance in ThreadLocal
*/
@Singleton
public class NekoHtmlParser extends GadgetHtmlParser {
@@ -60,7 +53,7 @@
public Document parseDomImpl(String source) throws GadgetException {
try {
Document document = parseFragment(source);
- HtmlSerializer.attach(document, new Serializer(), source);
+ HtmlSerializer.attach(document, new NekoSerializer(), source);
return document;
} catch (Exception e) {
throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
@@ -71,40 +64,35 @@
InputSource input = new InputSource(new StringReader(source));
if (attemptFullDocParseFirst(source)) {
DOMParser parser = new DOMParser();
- // Force parser not to use HTMLDocumentImpl as document implementation
-
parser.setProperty("http://apache.org/xml/properties/dom/document-class-name",
null);
+ // Force parser not to use HTMLDocumentImpl as document implementation
otherwise
+ // it forces all element names to uppercase.
+
parser.setProperty("http://apache.org/xml/properties/dom/document-class-name",
+ "org.apache.xerces.dom.DocumentImpl");
+ // Dont convert element names to upper/lowercase
parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"default");
+ // Preserve case of attributes
+ parser.setProperty("http://cyberneko.org/html/properties/names/attrs",
"no-change");
+ // Record entity references
+
parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs",
true);
+
parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs",
true);
+ // No need to defer as full DOM is walked later
+
parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion",
false);
parser.parse(input);
return parser.getDocument();
} else {
Document htmlDoc = documentProvider.createDocument(null, null, null);
+ // Workaround for error check failure adding text node to entity ref as
a child
+ htmlDoc.setStrictErrorChecking(false);
DOMFragmentParser parser = new DOMFragmentParser();
parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"default");
parser.setFeature("http://cyberneko.org/html/features/document-fragment", true);
+ parser.setProperty("http://cyberneko.org/html/properties/names/attrs",
"no-change");
+
parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs",
true);
+
parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs",
true);
DocumentFragment fragment = htmlDoc.createDocumentFragment();
parser.parse(input, fragment);
normalizeFragment(htmlDoc, fragment);
return htmlDoc;
}
}
-
- static class Serializer extends HtmlSerializer {
-
- public String serializeImpl(Document doc) {
- OutputFormat outputFormat = new OutputFormat();
- outputFormat.setPreserveSpace(true);
- outputFormat.setPreserveEmptyAttributes(false);
- if (doc.getDoctype() == null) {
- outputFormat.setOmitDocumentType(true);
- }
- StringWriter sw = createWriter(doc);
- HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat);
- try {
- serializer.serialize(doc);
- return sw.toString();
- } catch (IOException ioe) {
- return null;
- }
- }
- }
}
Added:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java?rev=721058&view=auto
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
(added)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
Wed Nov 26 17:22:21 2008
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+
+import org.cyberneko.html.HTMLElements;
+import org.cyberneko.html.HTMLEntities;
+import org.w3c.dom.*;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * This parser does not try to escape entities in text content as it expects
the parser
+ * to have retained the original entity references rather than its resolved
form in text nodes
+ */
+public class NekoSerializer extends HtmlSerializer
+{
+ public NekoSerializer() {
+ }
+
+ public String serializeImpl(Document doc) {
+ try {
+ StringWriter sw = createWriter(doc);
+ if (doc.getDoctype() != null) {
+ outputDocType(doc.getDoctype(), sw);
+ }
+ serialize(doc, sw);
+ String s = sw.toString();
+ return s;
+ } catch (IOException ioe) {
+ return null;
+ }
+ }
+
+ public static void serialize(Node n, Appendable output) throws IOException {
+ switch (n.getNodeType()) {
+ case Node.CDATA_SECTION_NODE: {
+ break;
+ }
+ case Node.COMMENT_NODE: {
+ output.append("<!--").append(n.getNodeValue()).append("-->");
+ break;
+ }
+ case Node.DOCUMENT_NODE: {
+ serialize(((Document)n).getDocumentElement(), output);
+ break;
+ }
+ case Node.ELEMENT_NODE: {
+ Element elem = (Element)n;
+ HTMLElements.Element htmlElement =
+ HTMLElements.getElement(elem.getNodeName());
+ NodeList children = elem.getChildNodes();
+ printStartElement(elem, output);
+ for (int i = 0; i < children.getLength(); i++) {
+ serialize(children.item(i), output);
+ }
+ if (!htmlElement.isEmpty()) {
+ output.append("</").append(elem.getNodeName()).append('>');
+ }
+ break;
+ }
+ case Node.ENTITY_REFERENCE_NODE: {
+ output.append("&").append(n.getNodeName()).append(";");
+ break;
+ }
+ case Node.TEXT_NODE: {
+ output.append(n.getTextContent());
+ break;
+ }
+ }
+ }
+
+ public static void outputDocType(DocumentType docType, Appendable output)
throws IOException {
+ output.append("<!DOCTYPE ");
+ // Use this so name matches case for XHTML
+
output.append(docType.getOwnerDocument().getDocumentElement().getNodeName());
+ if (docType.getPublicId() != null && docType.getPublicId().length() > 0) {
+ output.append(" ");
+ output.append("PUBLIC
").append('"').append(docType.getPublicId()).append('"');
+ }
+ if (docType.getSystemId() != null && docType.getSystemId().length() > 0) {
+ output.append(" ");
+ output.append('"').append(docType.getSystemId()).append('"');
+ }
+ output.append(">\n");
+ }
+
+ public static void printStartElement(Element elem, Appendable output) throws
IOException {
+ output.append("<").append(elem.getTagName());
+ NamedNodeMap attributes = elem.getAttributes();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ Attr attr = (Attr)attributes.item(i);
+ output.append(' ').append(attr.getNodeName());
+ if (attr.getNodeValue() != null &&
+ attr.getNodeValue().length() > 0) {
+ output.append("=\"");
+ printAttributeValue(attr.getNodeValue(), output);
+ output.append('"');
+ }
+ }
+ output.append(">");
+ }
+
+ public static void printAttributeValue(String text, Appendable output)
throws IOException {
+ int length = text.length();
+ for (int j = 0; j < length; j++) {
+ char c = text.charAt(j);
+ if (c == '"') {
+ output.append(""");
+ } else {
+ output.append(c);
+ }
+ }
+ }
+
+ public static void printEscapedText(CharSequence text, Appendable output)
throws IOException {
+ for (int i = 0; i < text.length(); i++) {
+ char c = text.charAt(i);
+ String entity = HTMLEntities.get(c);
+ if (entity != null) {
+ output.append('&').append(entity).append(";");
+ } else {
+ output.append(c);
+ }
+ }
+ }
+}
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Wed Nov 26 17:22:21 2008
@@ -19,38 +19,21 @@
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.xerces.xni.*;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+import org.apache.xerces.xni.parser.XMLInputSource;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Inject;
import com.google.inject.Singleton;
-
-import org.apache.xerces.xni.Augmentations;
-import org.apache.xerces.xni.NamespaceContext;
-import org.apache.xerces.xni.QName;
-import org.apache.xerces.xni.XMLAttributes;
-import org.apache.xerces.xni.XMLDocumentHandler;
-import org.apache.xerces.xni.XMLLocator;
-import org.apache.xerces.xni.XMLResourceIdentifier;
-import org.apache.xerces.xni.XMLString;
-import org.apache.xerces.xni.XNIException;
-import org.apache.xerces.xni.parser.XMLDocumentSource;
-import org.apache.xerces.xni.parser.XMLInputSource;
-import org.apache.xml.serialize.HTMLSerializer;
-import org.apache.xml.serialize.OutputFormat;
import org.cyberneko.html.HTMLConfiguration;
-import org.cyberneko.html.HTMLElements;
import org.cyberneko.html.HTMLEntities;
import org.cyberneko.html.HTMLScanner;
import org.cyberneko.html.HTMLTagBalancer;
-import org.w3c.dom.DOMImplementation;
-import org.w3c.dom.Document;
-import org.w3c.dom.DocumentFragment;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
+import org.w3c.dom.*;
import java.io.IOException;
import java.io.StringReader;
-import java.io.StringWriter;
import java.util.Set;
import java.util.Stack;
@@ -58,6 +41,8 @@
* Neko based DOM parser that concatentates elements which we dont care about
into
* text nodes to keep DOM model simplified. Much of this code is based on
* org.cyberneko.html.filters.Writer
+ *
+ * TODO: Create a reusable instance in ThreadLocal
*/
@Singleton
public class NekoSimplifiedHtmlParser extends GadgetHtmlParser {
@@ -80,8 +65,14 @@
htmlScanner.setDocumentHandler(tagBalancer);
HTMLConfiguration config = new HTMLConfiguration();
+ // Maintain original case for elements and attributes
config.setProperty("http://cyberneko.org/html/properties/names/elems",
"match");
+ config.setProperty("http://cyberneko.org/html/properties/names/attrs",
"no-change");
+ // Parse as fragment.
config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",
true);
+ // Get notified of entity and character references
+
config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs",
true);
+
config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs",
true);
tagBalancer.reset(config);
htmlScanner.reset(config);
XMLInputSource inputSource = new XMLInputSource(null, null, null);
@@ -93,7 +84,7 @@
Document document = handler.getDocument();
DocumentFragment fragment = handler.getFragment();
normalizeFragment(document, fragment);
- HtmlSerializer.attach(document, new Serializer(), source);
+ HtmlSerializer.attach(document, new NekoSerializer(), source);
return document;
} catch (IOException ioe) {
return null;
@@ -106,6 +97,7 @@
private class DocumentHandler implements XMLDocumentHandler {
private final Stack<Node> elementStack = new Stack<Node>();
private final StringBuilder builder;
+ private boolean inEntity = false;
private DocumentFragment documentFragment;
@@ -242,6 +234,7 @@
}
}
printEntity(name);
+ inEntity = true;
}
private void printEntity(String name) {
@@ -255,27 +248,14 @@
}
public void endGeneralEntity(String s, Augmentations augs) throws
XNIException {
- // No-op
+ inEntity = false;
}
public void characters(XMLString text, Augmentations augs) throws
XNIException {
- if
(HTMLElements.getElement(elementStack.peek().getNodeName()).isSpecial()) {
- builder.append(text.ch, text.offset, text.length);
- } else {
- for (int i = 0; i < text.length; i++) {
- char c = text.ch[text.offset + i];
- if (c != '\n') {
- String entity = HTMLEntities.get(c);
- if (entity != null) {
- printEntity(entity);
- } else {
- builder.append(c);
- }
- } else {
- builder.append("\n");
- }
- }
+ if (inEntity) {
+ return;
}
+ builder.append(text.ch, text.offset, text.length);
}
public void ignorableWhitespace(XMLString text, Augmentations augs) throws
XNIException {
@@ -317,34 +297,4 @@
return null;
}
}
-
- static class Serializer extends HtmlSerializer {
-
- @Override
- public String serializeImpl(Document doc) {
- OutputFormat outputFormat = new OutputFormat();
- outputFormat.setPreserveSpace(true);
- outputFormat.setPreserveEmptyAttributes(false);
- if (doc.getDoctype() == null) {
- outputFormat.setOmitDocumentType(true);
- }
- StringWriter sw = createWriter(doc);
- HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat) {
- // Overridden to prevent escaping of literal text
- @Override
- protected void characters(String s) throws IOException {
- this.content();
- this._printer.printText(s);
- }
- };
-
- try {
- serializer.serialize(doc);
- return sw.toString();
- } catch (IOException ioe) {
- return null;
- }
- }
- }
-
}
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
Wed Nov 26 17:22:21 2008
@@ -20,11 +20,10 @@
import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
import org.apache.shindig.gadgets.rewrite.XPathWrapper;
+import junit.framework.TestCase;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
-import junit.framework.TestCase;
-
/**
* Note these tests are of marginal use. Consider removing. More useful tests
would exercise
* the capability of the parser to handle strange HTML.
@@ -65,19 +64,9 @@
assertEquals("foo", wrapper.getValue("/html/body/div/@id"));
}
- public void testParseStringUnescapesProperly() throws Exception {
- parseStringUnescapesProperly(nekoParser);
- }
-
- void parseStringUnescapesProperly(GadgetHtmlParser htmlParser) throws
Exception {
- Document doc =
htmlParser.parseDom("<content&'chrome'>");
- XPathWrapper wrapper = new XPathWrapper(doc);
- assertEquals("<content&'chrome'>", wrapper.getValue("/html/body"));
- }
-
public void testParseNestedContentWithNoCloseForBrAndHr() throws Exception {
parseNestedContentWithNoCloseForBrAndHr(nekoParser);
- }
+ }
void parseNestedContentWithNoCloseForBrAndHr(GadgetHtmlParser htmlParser)
throws Exception {
Document doc = htmlParser.parseDom("<div>x and y<br> and <hr>z</div>");
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
Wed Nov 26 17:22:21 2008
@@ -69,14 +69,14 @@
this.numRuns = 10;
warmup = true;
//runCaja();
- //runNeko();
+ runNeko();
runNekoSimple();
//Sleep to let JIT kick in
Thread.sleep(10000L);
- this.numRuns = 50; //numRuns;
+ this.numRuns = numRuns;
warmup = false;
//runCaja();
- //runNeko();
+ runNeko();
runNekoSimple();
}
Copied:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
(from r721042,
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java)
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java?p2=incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java&p1=incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java&r1=721042&r2=721058&rev=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
Wed Nov 26 17:22:21 2008
@@ -17,19 +17,18 @@
*/
package org.apache.shindig.gadgets.parse.nekohtml;
+import org.apache.commons.io.IOUtils;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.apache.shindig.gadgets.parse.ParseModule;
-import org.apache.commons.io.IOUtils;
-import org.w3c.dom.Document;
-
import junit.framework.TestCase;
+import org.w3c.dom.Document;
/**
- * Test behavior of simplified HTML parser
+ * Test behavior of neko based parser and serializers
*/
-public class NekoParsersTest extends TestCase {
+public class NekoParserAndSerializeTest extends TestCase {
private NekoSimplifiedHtmlParser simple = new NekoSimplifiedHtmlParser(
new ParseModule.DOMImplementationProvider().get());
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
Wed Nov 26 17:22:21 2008
@@ -18,32 +18,18 @@
*/
package org.apache.shindig.gadgets.render;
-import static
org.apache.shindig.gadgets.render.RenderingContentRewriter.DEFAULT_CSS;
-import static
org.apache.shindig.gadgets.render.RenderingContentRewriter.FEATURES_KEY;
-import static
org.apache.shindig.gadgets.render.RenderingContentRewriter.INSERT_BASE_ELEMENT_KEY;
-import static org.easymock.EasyMock.expect;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
import org.apache.shindig.common.ContainerConfig;
import org.apache.shindig.common.PropertiesModule;
import org.apache.shindig.common.uri.Uri;
import org.apache.shindig.common.xml.XmlUtil;
-import org.apache.shindig.gadgets.Gadget;
-import org.apache.shindig.gadgets.GadgetContext;
-import org.apache.shindig.gadgets.GadgetException;
-import org.apache.shindig.gadgets.GadgetFeature;
-import org.apache.shindig.gadgets.GadgetFeatureRegistry;
-import org.apache.shindig.gadgets.JsLibrary;
-import org.apache.shindig.gadgets.MessageBundleFactory;
-import org.apache.shindig.gadgets.UrlGenerator;
+import org.apache.shindig.gadgets.*;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.ParseModule;
import org.apache.shindig.gadgets.preload.NullPreloads;
import org.apache.shindig.gadgets.preload.PreloadException;
import org.apache.shindig.gadgets.preload.PreloadedData;
import org.apache.shindig.gadgets.preload.Preloads;
+import static org.apache.shindig.gadgets.render.RenderingContentRewriter.*;
import org.apache.shindig.gadgets.rewrite.MutableContent;
import org.apache.shindig.gadgets.spec.GadgetSpec;
import org.apache.shindig.gadgets.spec.LocaleSpec;
@@ -56,21 +42,16 @@
import com.google.common.collect.Sets;
import com.google.inject.Guice;
import com.google.inject.Injector;
-
+import static org.easymock.EasyMock.expect;
import org.easymock.classextension.EasyMock;
import org.easymock.classextension.IMocksControl;
import org.json.JSONException;
import org.json.JSONObject;
+import static org.junit.Assert.*;
import org.junit.Before;
import org.junit.Test;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -150,7 +131,7 @@
@Test
public void completeDocument() throws Exception {
- String docType = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01
Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">";
+ String docType = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01
Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">";
String head = "<script src=\"foo.js\"></script><style
type=\"text/css\">body{color:red;}</style>";
String bodyAttr = " onload=\"foo();\"";
String body = "hello, world.";
Modified:
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
Wed Nov 26 17:22:21 2008
@@ -1,4 +1,4 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/loose.dtd">
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head id="head">
<link href="http://www.example.org/css.css" rel="stylesheet" type="text/css">
@@ -17,9 +17,9 @@
<input type="hidden" value="something">
<input type="text">
</div>
- <div><-- An unbalanced tag we dont care about -->
+ <div><-- An unbalanced tag we dont care about -->
<p>Some entities &#x27;"</p>
- <p>Not a real entity &fake;</p>
+ <p>Not a real entity &fake;</p>
</div></form>
</body></html>
\ No newline at end of file