Author: awiner
Date: Fri Mar  6 00:44:48 2009
New Revision: 750717

URL: http://svn.apache.org/viewvc?rev=750717&view=rev
Log:
Updated URL-escaping code for neko parser to avoid unnecessary expensive 
allocations.
Currently will perform URL escaping for any "src" and "href" attributes on 
non-namespaced elements.

Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java?rev=750717&r1=750716&r2=750717&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
 Fri Mar  6 00:44:48 2009
@@ -19,6 +19,7 @@
 package org.apache.shindig.gadgets.parse.nekohtml;
 
 import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.xerces.xni.QName;
 import org.cyberneko.html.HTMLElements;
 import org.cyberneko.html.HTMLEntities;
 import org.w3c.dom.Attr;
@@ -31,7 +32,9 @@
 
 import java.io.IOException;
 import java.io.StringWriter;
-import java.net.URL;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableSet;
 
 /**
  * This parser does not try to escape entities in text content as it expects 
the parser
@@ -39,6 +42,8 @@
  */
 public class NekoSerializer extends HtmlSerializer
 {
+  private static final Set<String> URL_ATTRIBUTES = ImmutableSet.of("href", 
"src");
+  
   public NekoSerializer() {
   }
 
@@ -137,26 +142,21 @@
     NamedNodeMap attributes = elem.getAttributes();
     for (int i = 0; i < attributes.getLength(); i++) {
       Attr attr = (Attr)attributes.item(i);
-      output.append(' ').append(attr.getNodeName());
+      String attrName = attr.getNodeName();
+      output.append(' ').append(attrName);
       if (attr.getNodeValue() != null &&
           attr.getNodeValue().length() > 0) {
         output.append("=\"");
-        printAttributeValue(attr.getNodeValue(), output);
+        boolean isUrlAttribute =
+          elem.getNamespaceURI() == null && URL_ATTRIBUTES.contains(attrName);
+        printAttributeValue(attr.getNodeValue(), output, isUrlAttribute);
         output.append('"');
       }
     }
     output.append(withXmlClose ? "/>" : ">");
   }
 
-  public static void printAttributeValue(String text, Appendable output) 
throws IOException {
-    boolean isUrl = false;
-    try {
-        new URL(text);
-        isUrl = true;
-    } catch (Exception e) {
-        // nop
-    }
-
+  public static void printAttributeValue(String text, Appendable output, 
boolean isUrl) throws IOException {
     int length = text.length();
     for (int j = 0; j < length; j++) {
       char c = text.charAt(j);
@@ -181,4 +181,11 @@
       }
     }
   }
+
+  /**
+   * Returns true if the listed attribute is an URL attribute.
+   */
+  public static boolean isUrlAttribute(QName name, String attributeName) {
+    return name.uri == null && URL_ATTRIBUTES.contains(attributeName);
+  }
 }

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=750717&r1=750716&r2=750717&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
 Fri Mar  6 00:44:48 2009
@@ -17,12 +17,6 @@
  */
 package org.apache.shindig.gadgets.parse.nekohtml;
 
-import java.io.IOException;
-import java.io.StringReader;
-import java.net.URL;
-import java.util.Set;
-import java.util.Stack;
-
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.HtmlSerializer;
@@ -49,6 +43,11 @@
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Set;
+import java.util.Stack;
+
 import com.google.common.collect.ImmutableSet;
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
@@ -261,8 +260,10 @@
     private void startUnimportantElement(QName qName, XMLAttributes 
xmlAttributes) {
       builder.append('<').append(qName.rawname);
       for (int i = 0; i < xmlAttributes.getLength(); i++) {
-        builder.append(' 
').append(xmlAttributes.getLocalName(i)).append("=\"");
-        appendAttributeValue(xmlAttributes.getValue(i));
+        String attributeName = xmlAttributes.getLocalName(i);
+        builder.append(' ').append(attributeName).append("=\"");
+        appendAttributeValue(xmlAttributes.getValue(i),
+            NekoSerializer.isUrlAttribute(qName, attributeName));
         builder.append('\"');
       }
       builder.append('>');
@@ -295,15 +296,7 @@
       return element;
     }
 
-    private void appendAttributeValue(String text) {
-      boolean isUrl = false;
-      try {
-        new URL(text);
-        isUrl = true;
-      } catch (Exception e) {
-        // nop
-      }
-
+    private void appendAttributeValue(String text, boolean isUrl) {
       for (int i = 0; i < text.length(); i++) {
         char c = text.charAt(i);
         if (c == '"') {


Reply via email to