Author: lryan
Date: Tue Nov 11 11:24:14 2008
New Revision: 713121
URL: http://svn.apache.org/viewvc?rev=713121&view=rev
Log:
Add support for caching parsed HTML documents in memory to speed up rewriter
(off by default)
Make cache exposes capacity as a property to avoid key generation costs when
cache has no capacity
Modified:
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
Modified:
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
---
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
(original)
+++
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
Tue Nov 11 11:24:14 2008
@@ -35,6 +35,14 @@
diskPersistent="false"
memoryStoreEvictionPolicy="LFU"/>
+ <!-- By default do not cache any parsed documents. This is experimental -->
+ <cache name="parsedDocuments"
+ maxElementsInMemory="0"
+ eternal="true"
+ overflowToDisk="false"
+ diskPersistent="false"
+ memoryStoreEvictionPolicy="LFU"/>
+
<!--
This configuration is only suitable for a modest sized HTTP cache.
You should configure a shared cache for production use.
Modified:
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
---
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
(original)
+++
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
Tue Nov 11 11:24:14 2008
@@ -43,4 +43,12 @@
* @return The entry stored under the given key, or null if it doesn't exist.
*/
public V removeElement(K key);
+
+ /**
+ * Returns the capacity of the cache.
+ *
+ * @return a positive integer indicating the upper bound on the number of
allowed elements
+ * in the cace, -1 signifies that the capacity is unbounded
+ */
+ public long getCapacity();
}
Modified:
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
---
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
(original)
+++
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
Tue Nov 11 11:24:14 2008
@@ -44,6 +44,10 @@
return super.remove(key);
}
+ public long getCapacity() {
+ return capacity;
+ }
+
@Override
protected synchronized boolean removeEldestEntry(Map.Entry<K, V> eldest) {
return size() > capacity;
Modified:
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
---
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
(original)
+++
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
Tue Nov 11 11:24:14 2008
@@ -82,4 +82,13 @@
return (V) value;
}
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.shindig.common.cache.Cache#getCapacity()
+ */
+ public long getCapacity() {
+ return cache.getCacheConfiguration().getMaxElementsInMemory() +
+ cache.getCacheConfiguration().getMaxElementsOnDisk();
+ }
}
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
Tue Nov 11 11:24:14 2008
@@ -17,24 +17,31 @@
*/
package org.apache.shindig.gadgets.parse;
+import org.apache.shindig.common.cache.Cache;
+import org.apache.shindig.common.cache.CacheProvider;
+import org.apache.shindig.common.util.HashUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;
import com.google.inject.ImplementedBy;
-
+import com.google.inject.Inject;
import org.w3c.dom.Document;
/**
- * Parser for arbitrary HTML content. The content may simply be a
- * fragment or snippet of HTML rather than a fully-structured Document,
- * so the interface returns a list of [EMAIL PROTECTED] ParsedHtmlNode} objects
- * rather than a single top-level item.
- *
- * [EMAIL PROTECTED] ParsedHtmlNode} for parsing details
+ * Parser for arbitrary HTML content
*/
@ImplementedBy(NekoSimplifiedHtmlParser.class)
public abstract class GadgetHtmlParser {
+ public static final String PARSED_DOUCMENTS = "parsedDocuments";
+
+ private Cache<String, Document> documentCache;
+
+ @Inject
+ public void setCacheProvider(CacheProvider cacheProvider) {
+ documentCache = cacheProvider.createCache(PARSED_DOUCMENTS);
+ }
+
/**
* @param content
* @return true if we detect a preamble of doctype or html
@@ -45,17 +52,40 @@
}
public final Document parseDom(String source) throws GadgetException {
- Document document = parseDomImpl(source);
- // Ensure head tag exists
- if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(), "head")
== null) {
- // Add as first element
- document.getDocumentElement().insertBefore(
- document.createElement("head"),
- document.getDocumentElement().getFirstChild());
+ Document document = null;
+ String key = null;
+ // Avoid checksum overhead if we arent caching
+ boolean shouldCache = shouldCache();
+ if (shouldCache) {
+ // TODO - Consider using the source if its under a certain size
+ key = HashUtil.rawChecksum(source.getBytes());
+ document = documentCache.getElement(key);
+ }
+ if (document == null) {
+ document = parseDomImpl(source);
+ // Ensure head tag exists
+ if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(),
"head") == null) {
+ // Add as first element
+ document.getDocumentElement().insertBefore(
+ document.createElement("head"),
+ document.getDocumentElement().getFirstChild());
+ }
+ if (shouldCache) {
+ documentCache.addElement(key, document);
+ }
+ }
+ if (shouldCache) {
+ Document copy = (Document)document.cloneNode(true);
+ HtmlSerializer.copySerializer(document, copy);
+ return copy;
}
return document;
}
+ private boolean shouldCache() {
+ return documentCache != null && documentCache.getCapacity() != 0;
+ }
+
/**
* @param source
* @return a parsed document or document fragment