Author: siren
Date: Sat Dec  9 14:27:07 2006
New Revision: 485076

URL: http://svn.apache.org/viewvc?view=rev&rev=485076
Log:
Optimize SpellCheckedMetadata further by taking into account the fact that it 
is used only for http-headers.

I am starting to believe that spellchecking should just be an utility method 
used by http protocol plugins.

Modified:
    
lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
    
lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java?view=diff&rev=485076&r1=485075&r2=485076
==============================================================================
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java 
(original)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java 
Sat Dec  9 14:27:07 2006
@@ -25,10 +25,9 @@
 
 /**
  * A decorator to Metadata that adds spellchecking capabilities to property
- * names.
- *
- * All the static String fields declared by this class are used as reference
- * names for syntax correction on meta-data naming.
+ * names. Currently used spelling vocabulary contains just the httpheaders from
+ * [EMAIL PROTECTED] HttpHeaders} class.
+ * 
  */
 public class SpellCheckedMetadata extends Metadata {
 
@@ -49,18 +48,23 @@
    */
   private static String[] normalized = null;
 
-  // Uses self introspection to fill the metanames index and the
-  // metanames list.
   static {
-    for (Field field : SpellCheckedMetadata.class.getFields()) {
-      int mods = field.getModifiers();
-      if (Modifier.isFinal(mods) && Modifier.isPublic(mods)
-          && Modifier.isStatic(mods) && field.getType().equals(String.class)) {
-        try {
-          String val = (String) field.get(null);
-          NAMES_IDX.put(normalize(val), val);
-        } catch (Exception e) {
-          // Simply ignore...
+
+    // Uses following array to fill the metanames index and the
+    // metanames list.
+    Class[] spellthese = {HttpHeaders.class};
+
+    for (Class spellCheckedNames : spellthese) {
+      for (Field field : spellCheckedNames.getFields()) {
+        int mods = field.getModifiers();
+        if (Modifier.isFinal(mods) && Modifier.isPublic(mods)
+            && Modifier.isStatic(mods) && 
field.getType().equals(String.class)) {
+          try {
+            String val = (String) field.get(null);
+            NAMES_IDX.put(normalize(val), val);
+          } catch (Exception e) {
+            // Simply ignore...
+          }
         }
       }
     }
@@ -125,8 +129,7 @@
 
   @Override
   public void add(final String name, final String value) {
-    String normalized = getNormalizedName(name);
-    super.add(normalized, value);
+    super.add(getNormalizedName(name), value);
   }
 
   @Override

Modified: 
lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java?view=diff&rev=485076&r1=485075&r2=485076
==============================================================================
--- 
lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
 (original)
+++ 
lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
 Sat Dec  9 14:27:07 2006
@@ -36,6 +36,8 @@
  */
 public class TestSpellCheckedMetadata extends TestCase {
 
+  private static final int NUM_ITERATIONS = 10000;
+
   public TestSpellCheckedMetadata(String testName) {
     super(testName);
   }
@@ -63,7 +65,7 @@
     assertEquals("Content-Type", SpellCheckedMetadata
         .getNormalizedName("contntype"));
   }
-
+  
   /** Test for the <code>add(String, String)</code> method. */
   public void testAdd() {
     String[] values = null;
@@ -237,18 +239,35 @@
     assertEquals(0, result.size());
     meta.add("name-one", "value-1.1");
     result = writeRead(meta);
+    meta.add("Contenttype", "text/html");
     assertEquals(1, result.size());
     assertEquals(1, result.getValues("name-one").length);
     assertEquals("value-1.1", result.get("name-one"));
     meta.add("name-two", "value-2.1");
     meta.add("name-two", "value-2.2");
     result = writeRead(meta);
-    assertEquals(2, result.size());
+    assertEquals(3, result.size());
     assertEquals(1, result.getValues("name-one").length);
     assertEquals("value-1.1", result.getValues("name-one")[0]);
     assertEquals(2, result.getValues("name-two").length);
     assertEquals("value-2.1", result.getValues("name-two")[0]);
     assertEquals("value-2.2", result.getValues("name-two")[1]);
+    assertEquals("text/html", result.get(Metadata.CONTENT_TYPE));
+  }
+
+  /**
+   * IO Test method, usable only when you plan to do changes in metadata
+   * to measure relative performance impact.
+   */
+  public final void testHandlingSpeed() {
+    SpellCheckedMetadata result;
+    long start = System.currentTimeMillis();
+    for (int i = 0; i < NUM_ITERATIONS; i++) {
+      SpellCheckedMetadata scmd = constructSpellCheckedMetadata();
+      result = writeRead(scmd);
+    }
+    System.out.println(NUM_ITERATIONS + " spellchecked metadata I/O time:"
+        + (System.currentTimeMillis() - start) + "ms.");
   }
 
   private SpellCheckedMetadata writeRead(SpellCheckedMetadata meta) {
@@ -262,6 +281,24 @@
       fail(ioe.toString());
     }
     return readed;
+  }
+
+  /**
+   * Assembles a Spellchecked metadata Object.
+   */
+  public static final SpellCheckedMetadata constructSpellCheckedMetadata() {
+    SpellCheckedMetadata scmd = new SpellCheckedMetadata();
+    scmd.add("Content-type", "foo/bar");
+    scmd.add("Connection", "close");
+    scmd.add("Last-Modified", "Sat, 09 Dec 2006 15:09:57 GMT");
+    scmd.add("Server", "Foobar");
+    scmd.add("Date", "Sat, 09 Dec 2006 18:07:20 GMT");
+    scmd.add("Accept-Ranges", "bytes");
+    scmd.add("ETag", "\"1234567-89-01234567\"");
+    scmd.add("Content-Length", "123");
+    scmd.add(Nutch.SEGMENT_NAME_KEY, "segmentzzz");
+    scmd.add(Nutch.SIGNATURE_KEY, "123");
+    return scmd;
   }
 
 }


Reply via email to