Author: lryan
Date: Wed Nov 26 17:22:21 2008
New Revision: 721058

URL: http://svn.apache.org/viewvc?rev=721058&view=rev
Log:
New Neko based HTML serializer.
 - Serializes simplified DOM 6x faster than previous serializer. 
 - Full DOM serializes 2x faster.
 - Doctype serialization now exactly matches input 

Parser fixes to better maintain entity references in parsed content and avoid 
redudant escaping.

Added:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
      - copied, changed from r721042, 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
    
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
 Wed Nov 26 17:22:21 2008
@@ -23,9 +23,6 @@
 
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
-
-import org.apache.xml.serialize.HTMLSerializer;
-import org.apache.xml.serialize.OutputFormat;
 import org.cyberneko.html.parsers.DOMFragmentParser;
 import org.cyberneko.html.parsers.DOMParser;
 import org.w3c.dom.DOMImplementation;
@@ -36,15 +33,11 @@
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.io.StringWriter;
 
 /**
- * Parser that uses the NekoHtml parser.
+ * Parser that uses the NekoHtml parser and produces an un-abridged DOM
  *
- * TODO:
- * Currently this code uses the ParsedXXX wrapper types so we can share 
abstraction
- * with Caja. This is probably unnecessary overhead and we would prefer that 
Caja
- * implements up to org.w3c.dom (or perhaps the Caja wrapper types should?)
+ * TODO: Create a reusable instance in ThreadLocal
  */
 @Singleton
 public class NekoHtmlParser extends GadgetHtmlParser {
@@ -60,7 +53,7 @@
   public Document parseDomImpl(String source) throws GadgetException {
     try {
       Document document = parseFragment(source);
-      HtmlSerializer.attach(document, new Serializer(), source);
+      HtmlSerializer.attach(document, new NekoSerializer(), source);
       return document;
     } catch (Exception e) {
       throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
@@ -71,40 +64,35 @@
     InputSource input = new InputSource(new StringReader(source));
     if (attemptFullDocParseFirst(source)) {
       DOMParser parser = new DOMParser();
-      // Force parser not to use HTMLDocumentImpl as document implementation
-      
parser.setProperty("http://apache.org/xml/properties/dom/document-class-name";, 
null);
+      // Force parser not to use HTMLDocumentImpl as document implementation 
otherwise
+      // it forces all element names to uppercase.
+      
parser.setProperty("http://apache.org/xml/properties/dom/document-class-name";,
+          "org.apache.xerces.dom.DocumentImpl");
+      // Dont convert element names to upper/lowercase
       parser.setProperty("http://cyberneko.org/html/properties/names/elems";, 
"default");
+      // Preserve case of attributes
+      parser.setProperty("http://cyberneko.org/html/properties/names/attrs";, 
"no-change");
+      // Record entity references
+      
parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs";, 
true);
+      
parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs";,
 true);
+      // No need to defer as full DOM is walked later
+      
parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion";, 
false);
       parser.parse(input);
       return parser.getDocument();
     } else {
       Document htmlDoc = documentProvider.createDocument(null, null, null);
+      // Workaround for error check failure adding text node to entity ref as 
a child
+      htmlDoc.setStrictErrorChecking(false);
       DOMFragmentParser parser = new DOMFragmentParser();
       parser.setProperty("http://cyberneko.org/html/properties/names/elems";, 
"default");
       
parser.setFeature("http://cyberneko.org/html/features/document-fragment";, true);
+      parser.setProperty("http://cyberneko.org/html/properties/names/attrs";, 
"no-change");
+      
parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs";, 
true);
+      
parser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs";,
 true);
       DocumentFragment fragment = htmlDoc.createDocumentFragment();
       parser.parse(input, fragment);
       normalizeFragment(htmlDoc, fragment);
       return htmlDoc;
     }
   }
-
-  static class Serializer extends HtmlSerializer {
-
-    public String serializeImpl(Document doc) {
-      OutputFormat outputFormat = new OutputFormat();
-      outputFormat.setPreserveSpace(true);
-      outputFormat.setPreserveEmptyAttributes(false);
-      if (doc.getDoctype() == null) {
-        outputFormat.setOmitDocumentType(true);
-      }
-      StringWriter sw = createWriter(doc);
-      HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat);
-      try {
-        serializer.serialize(doc);
-        return sw.toString();
-      } catch (IOException ioe) {
-        return null;
-      }
-    }
-  }
 }

Added: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java?rev=721058&view=auto
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
 (added)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
 Wed Nov 26 17:22:21 2008
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+
+import org.cyberneko.html.HTMLElements;
+import org.cyberneko.html.HTMLEntities;
+import org.w3c.dom.*;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * This parser does not try to escape entities in text content as it expects 
the parser
+ * to have retained the original entity references rather than its resolved 
form in text nodes
+ */
+public class NekoSerializer extends HtmlSerializer
+{
+  public NekoSerializer() {
+  }
+
+  public String serializeImpl(Document doc) {
+    try {
+      StringWriter sw = createWriter(doc);
+      if (doc.getDoctype() != null) {
+        outputDocType(doc.getDoctype(), sw);
+      }
+      serialize(doc, sw);
+      String s = sw.toString();
+      return s;
+    } catch (IOException ioe) {
+      return null;
+    }
+  }
+  
+  public static void serialize(Node n, Appendable output) throws IOException {
+    switch (n.getNodeType()) {
+      case Node.CDATA_SECTION_NODE: {
+        break;
+      }
+      case Node.COMMENT_NODE: {
+        output.append("<!--").append(n.getNodeValue()).append("-->");
+        break;
+      }
+      case Node.DOCUMENT_NODE: {
+        serialize(((Document)n).getDocumentElement(), output);
+        break;
+      }
+      case Node.ELEMENT_NODE: {
+        Element elem = (Element)n;
+        HTMLElements.Element htmlElement =
+            HTMLElements.getElement(elem.getNodeName());
+        NodeList children = elem.getChildNodes();
+        printStartElement(elem, output);
+        for (int i = 0; i < children.getLength(); i++) {
+          serialize(children.item(i), output);
+        }
+        if (!htmlElement.isEmpty()) {
+          output.append("</").append(elem.getNodeName()).append('>');
+        }
+        break;
+      }
+      case Node.ENTITY_REFERENCE_NODE: {
+        output.append("&").append(n.getNodeName()).append(";");
+        break;
+      }
+      case Node.TEXT_NODE: {
+        output.append(n.getTextContent());
+        break;
+      }
+    }
+  }
+
+  public static void outputDocType(DocumentType docType, Appendable output) 
throws IOException {
+    output.append("<!DOCTYPE ");
+    // Use this so name matches case for XHTML
+    
output.append(docType.getOwnerDocument().getDocumentElement().getNodeName());
+    if (docType.getPublicId() != null && docType.getPublicId().length() > 0) {
+      output.append(" ");
+      output.append("PUBLIC 
").append('"').append(docType.getPublicId()).append('"');
+    }
+    if (docType.getSystemId() != null && docType.getSystemId().length() > 0) {
+      output.append(" ");
+      output.append('"').append(docType.getSystemId()).append('"');
+    }
+    output.append(">\n");
+  }
+
+  public static void printStartElement(Element elem, Appendable output) throws 
IOException {
+    output.append("<").append(elem.getTagName());
+    NamedNodeMap attributes = elem.getAttributes();
+    for (int i = 0; i < attributes.getLength(); i++) {
+      Attr attr = (Attr)attributes.item(i);
+      output.append(' ').append(attr.getNodeName());
+      if (attr.getNodeValue() != null &&
+          attr.getNodeValue().length() > 0) {
+        output.append("=\"");
+        printAttributeValue(attr.getNodeValue(), output);
+        output.append('"');
+      }
+    }
+    output.append(">");
+  }
+
+  public static void printAttributeValue(String text, Appendable output) 
throws IOException {
+    int length = text.length();
+    for (int j = 0; j < length; j++) {
+      char c = text.charAt(j);
+      if (c == '"') {
+        output.append("&quot;");
+      } else {
+        output.append(c);
+      }
+    }
+  }
+
+  public static void printEscapedText(CharSequence text, Appendable output) 
throws IOException {
+    for (int i = 0; i < text.length(); i++) {
+      char c = text.charAt(i);
+      String entity = HTMLEntities.get(c);
+      if (entity != null) {
+        output.append('&').append(entity).append(";");
+      } else {
+        output.append(c);
+      }
+    }
+  }
+}

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 Wed Nov 26 17:22:21 2008
@@ -19,38 +19,21 @@
 
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.xerces.xni.*;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+import org.apache.xerces.xni.parser.XMLInputSource;
 
 import com.google.common.collect.ImmutableSet;
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
-
-import org.apache.xerces.xni.Augmentations;
-import org.apache.xerces.xni.NamespaceContext;
-import org.apache.xerces.xni.QName;
-import org.apache.xerces.xni.XMLAttributes;
-import org.apache.xerces.xni.XMLDocumentHandler;
-import org.apache.xerces.xni.XMLLocator;
-import org.apache.xerces.xni.XMLResourceIdentifier;
-import org.apache.xerces.xni.XMLString;
-import org.apache.xerces.xni.XNIException;
-import org.apache.xerces.xni.parser.XMLDocumentSource;
-import org.apache.xerces.xni.parser.XMLInputSource;
-import org.apache.xml.serialize.HTMLSerializer;
-import org.apache.xml.serialize.OutputFormat;
 import org.cyberneko.html.HTMLConfiguration;
-import org.cyberneko.html.HTMLElements;
 import org.cyberneko.html.HTMLEntities;
 import org.cyberneko.html.HTMLScanner;
 import org.cyberneko.html.HTMLTagBalancer;
-import org.w3c.dom.DOMImplementation;
-import org.w3c.dom.Document;
-import org.w3c.dom.DocumentFragment;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
+import org.w3c.dom.*;
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.io.StringWriter;
 import java.util.Set;
 import java.util.Stack;
 
@@ -58,6 +41,8 @@
  * Neko based DOM parser that concatentates elements which we dont care about 
into
  * text nodes to keep DOM model simplified. Much of this code is based on
  * org.cyberneko.html.filters.Writer
+ *
+ * TODO: Create a reusable instance in ThreadLocal
  */
 @Singleton
 public class NekoSimplifiedHtmlParser extends GadgetHtmlParser {
@@ -80,8 +65,14 @@
     htmlScanner.setDocumentHandler(tagBalancer);
 
     HTMLConfiguration config = new HTMLConfiguration();
+    // Maintain original case for elements and attributes
     config.setProperty("http://cyberneko.org/html/properties/names/elems";, 
"match");
+    config.setProperty("http://cyberneko.org/html/properties/names/attrs";, 
"no-change");
+    // Parse as fragment.
     
config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment";,
 true);
+    // Get notified of entity and character references
+    
config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs";, 
true);
+    
config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs";,
 true);
     tagBalancer.reset(config);
     htmlScanner.reset(config);
     XMLInputSource inputSource = new XMLInputSource(null, null, null);
@@ -93,7 +84,7 @@
       Document document = handler.getDocument();
       DocumentFragment fragment = handler.getFragment();
       normalizeFragment(document, fragment);
-      HtmlSerializer.attach(document, new Serializer(), source);
+      HtmlSerializer.attach(document, new NekoSerializer(), source);
       return document;
     } catch (IOException ioe) {
       return null;
@@ -106,6 +97,7 @@
   private class DocumentHandler implements XMLDocumentHandler {
     private final Stack<Node> elementStack = new Stack<Node>();
     private final StringBuilder builder;
+    private boolean inEntity = false;
 
 
     private DocumentFragment documentFragment;
@@ -242,6 +234,7 @@
         }
       }
       printEntity(name);
+      inEntity = true;
     }
 
     private void printEntity(String name) {
@@ -255,27 +248,14 @@
     }
 
     public void endGeneralEntity(String s, Augmentations augs) throws 
XNIException {
-      // No-op
+      inEntity = false;
     }
 
     public void characters(XMLString text, Augmentations augs) throws 
XNIException {
-      if 
(HTMLElements.getElement(elementStack.peek().getNodeName()).isSpecial()) {
-        builder.append(text.ch, text.offset, text.length);
-      } else {
-        for (int i = 0; i < text.length; i++) {
-          char c = text.ch[text.offset + i];
-          if (c != '\n') {
-            String entity = HTMLEntities.get(c);
-            if (entity != null) {
-              printEntity(entity);
-            } else {
-              builder.append(c);
-            }
-          } else {
-            builder.append("\n");
-          }
-        }
+      if (inEntity) {
+        return;
       }
+      builder.append(text.ch, text.offset, text.length);
     }
 
     public void ignorableWhitespace(XMLString text, Augmentations augs) throws 
XNIException {
@@ -317,34 +297,4 @@
       return null;
     }
   }
-
-  static class Serializer extends HtmlSerializer {
-
-    @Override
-    public String serializeImpl(Document doc) {
-      OutputFormat outputFormat = new OutputFormat();
-      outputFormat.setPreserveSpace(true);
-      outputFormat.setPreserveEmptyAttributes(false);
-      if (doc.getDoctype() == null) {
-        outputFormat.setOmitDocumentType(true);
-      }
-      StringWriter sw = createWriter(doc);
-      HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat) {
-        // Overridden to prevent escaping of literal text
-        @Override
-        protected void characters(String s) throws IOException {
-          this.content();
-          this._printer.printText(s);
-        }
-      };
-
-      try {
-        serializer.serialize(doc);
-        return sw.toString();
-      } catch (IOException ioe) {
-        return null;
-      }
-    }
-  }
-
 }

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
 Wed Nov 26 17:22:21 2008
@@ -20,11 +20,10 @@
 import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
 import org.apache.shindig.gadgets.rewrite.XPathWrapper;
 
+import junit.framework.TestCase;
 import org.w3c.dom.Document;
 import org.w3c.dom.NodeList;
 
-import junit.framework.TestCase;
-
 /**
  * Note these tests are of marginal use. Consider removing. More useful tests 
would exercise
  * the capability of the parser to handle strange HTML.
@@ -65,19 +64,9 @@
     assertEquals("foo", wrapper.getValue("/html/body/div/@id"));
   }
 
-  public void testParseStringUnescapesProperly() throws Exception {
-    parseStringUnescapesProperly(nekoParser);
-  }
-
-  void parseStringUnescapesProperly(GadgetHtmlParser htmlParser) throws 
Exception {
-    Document doc = 
htmlParser.parseDom("&lt;content&amp;&apos;chrome&apos;&gt;");
-    XPathWrapper wrapper = new XPathWrapper(doc);
-    assertEquals("<content&'chrome'>", wrapper.getValue("/html/body"));
-  }
-
   public void testParseNestedContentWithNoCloseForBrAndHr() throws Exception {
     parseNestedContentWithNoCloseForBrAndHr(nekoParser);
-  }
+  }                     
 
   void parseNestedContentWithNoCloseForBrAndHr(GadgetHtmlParser htmlParser) 
throws Exception {
     Document doc = htmlParser.parseDom("<div>x and y<br> and <hr>z</div>");

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
 Wed Nov 26 17:22:21 2008
@@ -69,14 +69,14 @@
     this.numRuns = 10;
     warmup = true;
     //runCaja();
-    //runNeko();
+    runNeko();
     runNekoSimple();
     //Sleep to let JIT kick in
     Thread.sleep(10000L);
-    this.numRuns = 50; //numRuns;
+    this.numRuns = numRuns;
     warmup = false;
     //runCaja();
-    //runNeko();
+    runNeko();
     runNekoSimple();
   }
 

Copied: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
 (from r721042, 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java)
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java?p2=incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java&p1=incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java&r1=721042&r2=721058&rev=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParsersTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
 Wed Nov 26 17:22:21 2008
@@ -17,19 +17,18 @@
  */
 package org.apache.shindig.gadgets.parse.nekohtml;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.HtmlSerializer;
 import org.apache.shindig.gadgets.parse.ParseModule;
 
-import org.apache.commons.io.IOUtils;
-import org.w3c.dom.Document;
-
 import junit.framework.TestCase;
+import org.w3c.dom.Document;
 
 /**
- * Test behavior of simplified HTML parser
+ * Test behavior of neko based parser and serializers
  */
-public class NekoParsersTest extends TestCase {
+public class NekoParserAndSerializeTest extends TestCase {
 
   private NekoSimplifiedHtmlParser simple = new NekoSimplifiedHtmlParser(
         new ParseModule.DOMImplementationProvider().get());

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
 Wed Nov 26 17:22:21 2008
@@ -18,32 +18,18 @@
  */
 package org.apache.shindig.gadgets.render;
 
-import static 
org.apache.shindig.gadgets.render.RenderingContentRewriter.DEFAULT_CSS;
-import static 
org.apache.shindig.gadgets.render.RenderingContentRewriter.FEATURES_KEY;
-import static 
org.apache.shindig.gadgets.render.RenderingContentRewriter.INSERT_BASE_ELEMENT_KEY;
-import static org.easymock.EasyMock.expect;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
 import org.apache.shindig.common.ContainerConfig;
 import org.apache.shindig.common.PropertiesModule;
 import org.apache.shindig.common.uri.Uri;
 import org.apache.shindig.common.xml.XmlUtil;
-import org.apache.shindig.gadgets.Gadget;
-import org.apache.shindig.gadgets.GadgetContext;
-import org.apache.shindig.gadgets.GadgetException;
-import org.apache.shindig.gadgets.GadgetFeature;
-import org.apache.shindig.gadgets.GadgetFeatureRegistry;
-import org.apache.shindig.gadgets.JsLibrary;
-import org.apache.shindig.gadgets.MessageBundleFactory;
-import org.apache.shindig.gadgets.UrlGenerator;
+import org.apache.shindig.gadgets.*;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.ParseModule;
 import org.apache.shindig.gadgets.preload.NullPreloads;
 import org.apache.shindig.gadgets.preload.PreloadException;
 import org.apache.shindig.gadgets.preload.PreloadedData;
 import org.apache.shindig.gadgets.preload.Preloads;
+import static org.apache.shindig.gadgets.render.RenderingContentRewriter.*;
 import org.apache.shindig.gadgets.rewrite.MutableContent;
 import org.apache.shindig.gadgets.spec.GadgetSpec;
 import org.apache.shindig.gadgets.spec.LocaleSpec;
@@ -56,21 +42,16 @@
 import com.google.common.collect.Sets;
 import com.google.inject.Guice;
 import com.google.inject.Injector;
-
+import static org.easymock.EasyMock.expect;
 import org.easymock.classextension.EasyMock;
 import org.easymock.classextension.IMocksControl;
 import org.json.JSONException;
 import org.json.JSONObject;
+import static org.junit.Assert.*;
 import org.junit.Before;
 import org.junit.Test;
 
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -150,7 +131,7 @@
 
   @Test
   public void completeDocument() throws Exception {
-    String docType = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 
Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\";>";
+    String docType = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 
Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\";>";
     String head = "<script src=\"foo.js\"></script><style 
type=\"text/css\">body{color:red;}</style>";
     String bodyAttr = " onload=\"foo();\"";
     String body = "hello, world.";

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html?rev=721058&r1=721057&r2=721058&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-expected.html
 Wed Nov 26 17:22:21 2008
@@ -1,4 +1,4 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
 <html>
 <head id="head">
   <link href="http://www.example.org/css.css"; rel="stylesheet" type="text/css">
@@ -17,9 +17,9 @@
         <input type="hidden" value="something">
         <input type="text">
       </div>
-      <div>&lt;-- An unbalanced tag we dont care about --&gt;
+      <div><-- An unbalanced tag we dont care about -->
       <p>Some entities &amp;#x27;&quot;</p>
-      <p>Not a real entity &amp;fake;</p>
+      <p>Not a real entity &fake;</p>
     </div></form>
 
 </body></html>
\ No newline at end of file


Reply via email to