Author: lryan
Date: Tue Nov 11 11:24:14 2008
New Revision: 713121

URL: http://svn.apache.org/viewvc?rev=713121&view=rev
Log:
Add support for caching parsed HTML documents in memory to speed up rewriter 
(off by default)
Make cache exposes capacity as a property to avoid key generation costs when 
cache has no capacity

Modified:
    
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
    
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
    
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
    
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java

Modified: 
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
 (original)
+++ 
incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml
 Tue Nov 11 11:24:14 2008
@@ -35,6 +35,14 @@
     diskPersistent="false"
     memoryStoreEvictionPolicy="LFU"/>
 
+  <!-- By default do not cache any parsed documents. This is experimental -->
+  <cache name="parsedDocuments"
+    maxElementsInMemory="0"
+    eternal="true"
+    overflowToDisk="false"
+    diskPersistent="false"
+    memoryStoreEvictionPolicy="LFU"/>
+
   <!-- 
     This configuration is only suitable for a modest sized HTTP cache.
     You should configure a shared cache for production use.

Modified: 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
 (original)
+++ 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java
 Tue Nov 11 11:24:14 2008
@@ -43,4 +43,12 @@
    * @return The entry stored under the given key, or null if it doesn't exist.
    */
   public V removeElement(K key);
+
+  /**
+   * Returns the capacity of the cache.
+   *
+   * @return a positive integer indicating the upper bound on the number of 
allowed elements
+   * in the cace, -1 signifies that the capacity is unbounded
+   */
+  public long getCapacity();
 }

Modified: 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
 (original)
+++ 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java
 Tue Nov 11 11:24:14 2008
@@ -44,6 +44,10 @@
     return super.remove(key);
   }
 
+  public long getCapacity() {
+    return capacity;
+  }
+
   @Override
   protected synchronized boolean removeEldestEntry(Map.Entry<K, V> eldest) {
     return size() > capacity;

Modified: 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
 (original)
+++ 
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java
 Tue Nov 11 11:24:14 2008
@@ -82,4 +82,13 @@
     return (V) value;
   }
 
+  /*
+   * (non-Javadoc)
+   *
+   * @see org.apache.shindig.common.cache.Cache#getCapacity()
+   */
+  public long getCapacity() {
+    return cache.getCacheConfiguration().getMaxElementsInMemory() +
+        cache.getCacheConfiguration().getMaxElementsOnDisk();
+  }
 }

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=713121&r1=713120&r2=713121&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
 Tue Nov 11 11:24:14 2008
@@ -17,24 +17,31 @@
  */
 package org.apache.shindig.gadgets.parse;
 
+import org.apache.shindig.common.cache.Cache;
+import org.apache.shindig.common.cache.CacheProvider;
+import org.apache.shindig.common.util.HashUtil;
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;
 
 import com.google.inject.ImplementedBy;
-
+import com.google.inject.Inject;
 import org.w3c.dom.Document;
 
 /**
- * Parser for arbitrary HTML content. The content may simply be a
- * fragment or snippet of HTML rather than a fully-structured Document,
- * so the interface returns a list of [EMAIL PROTECTED] ParsedHtmlNode} objects
- * rather than a single top-level item.
- * 
- * [EMAIL PROTECTED] ParsedHtmlNode} for parsing details
+ * Parser for arbitrary HTML content
  */
 @ImplementedBy(NekoSimplifiedHtmlParser.class)
 public abstract class GadgetHtmlParser {
 
+  public static final String PARSED_DOUCMENTS = "parsedDocuments";
+
+  private Cache<String, Document> documentCache;
+
+  @Inject
+  public void setCacheProvider(CacheProvider cacheProvider) {
+    documentCache = cacheProvider.createCache(PARSED_DOUCMENTS);
+  }
+
   /**
    * @param content
    * @return true if we detect a preamble of doctype or html
@@ -45,17 +52,40 @@
   }
 
   public final Document parseDom(String source) throws GadgetException {
-    Document document = parseDomImpl(source);
-    // Ensure head tag exists
-    if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(), "head") 
== null) {
-      // Add as first element
-      document.getDocumentElement().insertBefore(
-          document.createElement("head"),
-          document.getDocumentElement().getFirstChild());
+    Document document = null;
+    String key = null;  
+    // Avoid checksum overhead if we arent caching
+    boolean shouldCache = shouldCache();
+    if (shouldCache) {
+      // TODO - Consider using the source if its under a certain size
+      key = HashUtil.rawChecksum(source.getBytes());
+      document = documentCache.getElement(key);
+    }
+    if (document == null) {
+      document = parseDomImpl(source);
+      // Ensure head tag exists
+      if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(), 
"head") == null) {
+        // Add as first element
+        document.getDocumentElement().insertBefore(
+            document.createElement("head"),
+            document.getDocumentElement().getFirstChild());
+      }
+      if (shouldCache) {
+        documentCache.addElement(key, document);
+      }
+    }
+    if (shouldCache) {
+      Document copy = (Document)document.cloneNode(true);
+      HtmlSerializer.copySerializer(document, copy);
+      return copy;
     }
     return document;
   }
 
+  private boolean shouldCache() {
+    return documentCache != null && documentCache.getCapacity() != 0;
+  }
+
   /**
    * @param source
    * @return a parsed document or document fragment


Reply via email to