Author: siren Date: Sat Dec 9 14:27:07 2006 New Revision: 485076 URL: http://svn.apache.org/viewvc?view=rev&rev=485076 Log: Optimize SpellCheckedMetadata further by taking into account the fact that it is used only for http-headers.
I am starting to believe that spellchecking should just be an utility method used by http protocol plugins. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java?view=diff&rev=485076&r1=485075&r2=485076 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java Sat Dec 9 14:27:07 2006 @@ -25,10 +25,9 @@ /** * A decorator to Metadata that adds spellchecking capabilities to property - * names. - * - * All the static String fields declared by this class are used as reference - * names for syntax correction on meta-data naming. + * names. Currently used spelling vocabulary contains just the httpheaders from + * [EMAIL PROTECTED] HttpHeaders} class. + * */ public class SpellCheckedMetadata extends Metadata { @@ -49,18 +48,23 @@ */ private static String[] normalized = null; - // Uses self introspection to fill the metanames index and the - // metanames list. static { - for (Field field : SpellCheckedMetadata.class.getFields()) { - int mods = field.getModifiers(); - if (Modifier.isFinal(mods) && Modifier.isPublic(mods) - && Modifier.isStatic(mods) && field.getType().equals(String.class)) { - try { - String val = (String) field.get(null); - NAMES_IDX.put(normalize(val), val); - } catch (Exception e) { - // Simply ignore... + + // Uses following array to fill the metanames index and the + // metanames list. + Class[] spellthese = {HttpHeaders.class}; + + for (Class spellCheckedNames : spellthese) { + for (Field field : spellCheckedNames.getFields()) { + int mods = field.getModifiers(); + if (Modifier.isFinal(mods) && Modifier.isPublic(mods) + && Modifier.isStatic(mods) && field.getType().equals(String.class)) { + try { + String val = (String) field.get(null); + NAMES_IDX.put(normalize(val), val); + } catch (Exception e) { + // Simply ignore... + } } } } @@ -125,8 +129,7 @@ @Override public void add(final String name, final String value) { - String normalized = getNormalizedName(name); - super.add(normalized, value); + super.add(getNormalizedName(name), value); } @Override Modified: lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java?view=diff&rev=485076&r1=485075&r2=485076 ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java Sat Dec 9 14:27:07 2006 @@ -36,6 +36,8 @@ */ public class TestSpellCheckedMetadata extends TestCase { + private static final int NUM_ITERATIONS = 10000; + public TestSpellCheckedMetadata(String testName) { super(testName); } @@ -63,7 +65,7 @@ assertEquals("Content-Type", SpellCheckedMetadata .getNormalizedName("contntype")); } - + /** Test for the <code>add(String, String)</code> method. */ public void testAdd() { String[] values = null; @@ -237,18 +239,35 @@ assertEquals(0, result.size()); meta.add("name-one", "value-1.1"); result = writeRead(meta); + meta.add("Contenttype", "text/html"); assertEquals(1, result.size()); assertEquals(1, result.getValues("name-one").length); assertEquals("value-1.1", result.get("name-one")); meta.add("name-two", "value-2.1"); meta.add("name-two", "value-2.2"); result = writeRead(meta); - assertEquals(2, result.size()); + assertEquals(3, result.size()); assertEquals(1, result.getValues("name-one").length); assertEquals("value-1.1", result.getValues("name-one")[0]); assertEquals(2, result.getValues("name-two").length); assertEquals("value-2.1", result.getValues("name-two")[0]); assertEquals("value-2.2", result.getValues("name-two")[1]); + assertEquals("text/html", result.get(Metadata.CONTENT_TYPE)); + } + + /** + * IO Test method, usable only when you plan to do changes in metadata + * to measure relative performance impact. + */ + public final void testHandlingSpeed() { + SpellCheckedMetadata result; + long start = System.currentTimeMillis(); + for (int i = 0; i < NUM_ITERATIONS; i++) { + SpellCheckedMetadata scmd = constructSpellCheckedMetadata(); + result = writeRead(scmd); + } + System.out.println(NUM_ITERATIONS + " spellchecked metadata I/O time:" + + (System.currentTimeMillis() - start) + "ms."); } private SpellCheckedMetadata writeRead(SpellCheckedMetadata meta) { @@ -262,6 +281,24 @@ fail(ioe.toString()); } return readed; + } + + /** + * Assembles a Spellchecked metadata Object. + */ + public static final SpellCheckedMetadata constructSpellCheckedMetadata() { + SpellCheckedMetadata scmd = new SpellCheckedMetadata(); + scmd.add("Content-type", "foo/bar"); + scmd.add("Connection", "close"); + scmd.add("Last-Modified", "Sat, 09 Dec 2006 15:09:57 GMT"); + scmd.add("Server", "Foobar"); + scmd.add("Date", "Sat, 09 Dec 2006 18:07:20 GMT"); + scmd.add("Accept-Ranges", "bytes"); + scmd.add("ETag", "\"1234567-89-01234567\""); + scmd.add("Content-Length", "123"); + scmd.add(Nutch.SEGMENT_NAME_KEY, "segmentzzz"); + scmd.add(Nutch.SIGNATURE_KEY, "123"); + return scmd; } }