Author: awiner
Date: Tue Mar 17 18:54:19 2009
New Revision: 755349
URL: http://svn.apache.org/viewvc?rev=755349&view=rev
Log:
Add SanitizedRenderingContentRewriter.bypassSanitization() to allow trusted
rewriters (esp. template tag handlers) to generate content that would otherwise
be rejected (target attribute on links, OBJECT tags for flash, etc.)
Fix SocialMarkupHtmlParser to generate comment nodes so sanitization can strip
them
Update sanitizer test to use the default ParseModule (which is
SocialMarkupHtmlParser now)
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
Tue Mar 17 18:54:19 2009
@@ -272,7 +272,7 @@
/** Create an Element in the DOM for an important element */
private Element startImportantElement(QName qName, XMLAttributes
xmlAttributes) {
if (builder.length() > 0) {
-
elementStack.peek().appendChild(document.createTextNode(builder.toString()));
+ appendChild(document.createTextNode(builder.toString()));
builder.setLength(0);
}
@@ -292,7 +292,7 @@
element.setAttribute(xmlAttributes.getLocalName(i) ,
xmlAttributes.getValue(i));
}
}
- elementStack.peek().appendChild(element);
+ appendChild(element);
return element;
}
@@ -377,7 +377,7 @@
public void endDocument(Augmentations augs) throws XNIException {
if (builder.length() > 0) {
-
elementStack.peek().appendChild(document.createTextNode(builder.toString()));
+ appendChild(document.createTextNode(builder.toString()));
builder.setLength(0);
}
elementStack.pop();
@@ -389,5 +389,9 @@
public XMLDocumentSource getDocumentSource() {
return null;
}
+
+ protected final void appendChild(Node node) {
+ elementStack.peek().appendChild(node);
+ }
}
}
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParser.java
Tue Mar 17 18:54:19 2009
@@ -29,6 +29,7 @@
import org.cyberneko.html.HTMLConfiguration;
import org.cyberneko.html.HTMLScanner;
import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Node;
import com.google.inject.Inject;
import com.google.inject.Singleton;
@@ -99,6 +100,15 @@
super.endElement(name, augs);
}
+
+ @Override
+ public void comment(XMLString text, Augmentations augs) throws
XNIException {
+ // Add comments as comment nodes - needed to support sanitization
+ // of SocialMarkup-parsed content
+ Node comment = getDocument().createComment(new String(text.ch,
text.offset, text.length));
+ appendChild(comment);
+ }
+
@Override
public void startElement(QName name, XMLAttributes xmlAttributes,
Augmentations augs)
throws XNIException {
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
Tue Mar 17 18:54:19 2009
@@ -76,19 +76,29 @@
private static final Set<String> URI_ATTRIBUTES = ImmutableSet.of("href",
"src");
- // Attributes to forcibly rewrite and require an image mime type
+ /** Key stored as element user-data to bypass sanitization */
+ private static final String BYPASS_SANITIZATION_KEY =
"shindig.bypassSanitization";
+
+ /** Attributes to forcibly rewrite and require an image mime type */
private static final Map<String, ImmutableSet<String>>
PROXY_IMAGE_ATTRIBUTES =
ImmutableMap.of("img", ImmutableSet.of("src"));
/**
- * Is the Gadget to be rendered sanitized
- * @param gadget
- * @return
+ * Is the Gadget to be rendered sanitized?
+ * @return true if sanitization will be enabled
*/
public static boolean isSanitizedRenderingRequest(Gadget gadget) {
return ("1".equals(gadget.getContext().getParameter("sanitize")));
}
-
+
+ /**
+ * Marks that an element - and all its attributes and children - are
+ * trusted content.
+ */
+ public static void bypassSanitization(Element element) {
+ element.setUserData(BYPASS_SANITIZATION_KEY, true, null);
+ }
+
private final Set<String> allowedTags;
private final Set<String> allowedAttributes;
private final CajaCssSanitizer cssSanitizer;
@@ -238,7 +248,9 @@
case Node.ELEMENT_NODE:
case Node.DOCUMENT_NODE:
Element element = (Element) node;
- if (allowedTags.contains(element.getTagName().toLowerCase())) {
+ if (canBypassSanitization(element)) {
+ return;
+ } else if (allowedTags.contains(element.getTagName().toLowerCase()))
{
// TODO - Add special case for stylesheet LINK nodes.
// Special case handling for style nodes
if (element.getTagName().equalsIgnoreCase("style")) {
@@ -301,6 +313,10 @@
return list;
}
+ private static boolean canBypassSanitization(Element element) {
+ return (element.getUserData(BYPASS_SANITIZATION_KEY) != null);
+ }
+
/** Convert a NamedNodeMap to a list for easy and safe operations */
private static List<Node> toList(NodeList nodes) {
List<Node> list = new ArrayList<Node>(nodes.getLength());
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java?rev=755349&r1=755348&r2=755349&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
Tue Mar 17 18:54:19 2009
@@ -18,6 +18,9 @@
*/
package org.apache.shindig.gadgets.render;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
import org.apache.commons.io.IOUtils;
import org.apache.shindig.common.PropertiesModule;
import org.apache.shindig.common.uri.Uri;
@@ -27,28 +30,18 @@
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.http.HttpResponseBuilder;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.ParseModule;
import org.apache.shindig.gadgets.parse.caja.CajaCssParser;
import org.apache.shindig.gadgets.parse.caja.CajaCssSanitizer;
-import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
import org.apache.shindig.gadgets.rewrite.ContentRewriter;
import org.apache.shindig.gadgets.rewrite.ContentRewriterFeatureFactory;
import org.apache.shindig.gadgets.rewrite.MutableContent;
import org.apache.shindig.gadgets.servlet.ProxyBase;
import org.apache.shindig.gadgets.spec.GadgetSpec;
-
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
-import com.google.inject.AbstractModule;
-import com.google.inject.Guice;
-import com.google.inject.Injector;
-import com.google.inject.Provider;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
import org.junit.Before;
import org.junit.Test;
-import org.w3c.dom.DOMImplementation;
-import org.w3c.dom.bootstrap.DOMImplementationRegistry;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
import java.util.Collections;
import java.util.HashSet;
@@ -56,6 +49,11 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+
public class SanitizedRenderingContentRewriterTest {
private static final Set<String> DEFAULT_TAGS = ImmutableSet.of("html",
"head", "body");
private static final Pattern BODY_REGEX =
Pattern.compile(".*<body>(.*)</body>.*");
@@ -77,7 +75,7 @@
@Before
public void setUp() throws Exception {
- Injector injector = Guice.createInjector(new TestParseModule(), new
PropertiesModule());
+ Injector injector = Guice.createInjector(new ParseModule(), new
PropertiesModule());
parser = injector.getInstance(GadgetHtmlParser.class);
gadget = new Gadget().setContext(unsanitaryGadgetContext);
gadget.setSpec(new GadgetSpec(Uri.parse("www.example.org/gadget.xml"),
@@ -225,7 +223,28 @@
assertNull(rewrite(req, response));
}
- @Test
+ @Test
+ public void sanitizationBypassAllowed() {
+ String markup = "<p foo=\"bar\"><b>Parag</b><!--raph--></p>";
+ // Create a rewriter that would strip everything
+ ContentRewriter rewriter = createRewriter(set(), set());
+
+ MutableContent mc = new MutableContent(parser, markup);
+ Document document = mc.getDocument();
+ // Force the content to get re-serialized
+ MutableContent.notifyEdit(document);
+ String fullMarkup = mc.getContent();
+
+ Element paragraphTag = (Element)
document.getElementsByTagName("p").item(0);
+ // Mark the paragraph tag element as trusted
+ SanitizedRenderingContentRewriter.bypassSanitization(paragraphTag);
+ rewriter.rewrite(gadget, mc);
+
+ // The document should be unchanged
+ assertEquals(fullMarkup, mc.getContent());
+ }
+
+ @Test
public void restrictHrefAndSrcAttributes() {
String markup =
"<element " +
@@ -294,54 +313,4 @@
gadget.setCurrentView(gadget.getSpec().getViews().values().iterator().next());
assertEquals(sanitized, rewrite(gadget, markup, set("p", "b", "style"),
set()));
}
-
- private static class TestParseModule extends AbstractModule {
-
- @Override
- protected void configure() {
- bind(GadgetHtmlParser.class).to(NekoHtmlParser.class);
-
bind(DOMImplementation.class).toProvider(DOMImplementationProvider.class);
- }
-
- /**
- * Provider of new HTMLDocument implementations. Used to hide XML parser
weirdness
- */
- public static class DOMImplementationProvider implements
Provider<DOMImplementation> {
-
- DOMImplementation domImpl;
-
- public DOMImplementationProvider() {
- try {
- DOMImplementationRegistry registry =
DOMImplementationRegistry.newInstance();
- // Require the traversal API
- domImpl = registry.getDOMImplementation("XML 1.0 Traversal 2.0");
- } catch (Exception e) {
- // Try another
- }
- // This is ugly but effective
- try {
- if (domImpl == null) {
- domImpl = (DOMImplementation)
-
Class.forName("org.apache.xerces.internal.dom.DOMImplementationImpl").
- getMethod("getDOMImplementation").invoke(null);
- }
- } catch (Exception ex) {
- //try another
- }
- try {
- if (domImpl == null) {
- domImpl = (DOMImplementation)
-
Class.forName("com.sun.org.apache.xerces.internal.dom.DOMImplementationImpl").
- getMethod("getDOMImplementation").invoke(null);
- }
- } catch (Exception ex) {
- throw new RuntimeException("Could not find HTML DOM implementation",
ex);
- }
- }
-
- public DOMImplementation get() {
- return domImpl;
- }
- }
- }
}