: Chris: What would you like to see vis-a-vis the many field issues before
: committing?  Should we put in a global lazy-field-disable option?

Yeah, a simple solrconfig option that lets you disable it completley is
probably a good idea (especailly in light of LUCENE-683) and i don't see
any reason why we need any more complicated solution right now.

this is the microbenchmark i was working on when i discovered LUCENE-683,
i had to put a littl hack in to ignore the last few docs when randomly
picking them, but besides that, in all of hte differnet scenerios i tried,
i couldn't find one where re-fetching a document after it had already been
loaded with lazy fields was ever faster then just reusing the existing
doc (who knows if that will change after the bug get's fixed though)...


package org.apache.lucene;

/**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Random;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
import java.util.Iterator;

import junit.framework.TestCase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;

import org.apache.lucene.store.*;
import org.apache.lucene.document.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;

public class TestLazyBenchmark extends TestCase {

  public static int BASE_SEED = 13;

  public static int getProp(String n, int def) {
    return Integer.valueOf(System.getProperty(n,""+def)).intValue();
  }
  public static int NUM_DOCS = getProp("bench.docs",2000);
  public static int NUM_FIELDS = getProp("bench.fields",100);
  public static int NUM_ITERS = getProp("bench.iters",2000);
  public static int NUM_HITS = getProp("bench.hits",1);


  /** work arround for bug in lazy loading last field of last doc
   * (or maybe more)
   */
  public static int FUDGE = 5;

  private static String[] data = new String[] {
    "asdf qqwert lkj weroia lkjadsf kljsdfowq iero ",
    " 8432 lkj nadsf w3r9 lk 3r4 l,sdf 0werlk anm adsf rewr ",
    "lkjadf ;lkj kjlsa; aoi2winm lksa;93r lka adsfwr90 ",
    ";lkj ;lak -2-fdsaj w309r5 klasdfn ,dvoawo oiewf j;las;ldf w2 ",
    " ;lkjdsaf; kwe ;ladsfn [0924r52n ldsanf jt498ut5a nlkma oi49ut ",
    "lkj asd9u0942t ;lkndv moaiewjut 09sadlkf 43wt [j'sadnm at [ualknef ;a43 "
  };

  private static String MAGIC_FIELD = "f"+Integer.valueOf(NUM_FIELDS / 3);

  private static FieldSelector SELECTOR = new FieldSelector() {
      public FieldSelectorResult accept(String f) {
        if (f.equals(MAGIC_FIELD)) {
          return FieldSelectorResult.LOAD;
        }
        return FieldSelectorResult.LAZY_LOAD;
      }
    };

  private static Directory makeIndex() throws RuntimeException {
    System.out.println("bench.docs   = " + NUM_DOCS);
    System.out.println("bench.fields = " + NUM_FIELDS);
    System.out.println("bench.iters  = " + NUM_ITERS);
    System.out.println("bench.hits   = " + NUM_HITS);

    Directory dir = new RAMDirectory();
    try {
      Random r = new Random(BASE_SEED + 42) ;
      Analyzer analyzer = new SimpleAnalyzer();
      IndexWriter writer = new IndexWriter(dir, analyzer, true);

      writer.setUseCompoundFile(false);

      for (int d = 1; d <= NUM_DOCS; d++) {
        Document doc = new Document();
        for (int f = 1; f <= NUM_FIELDS; f++ ) {
          doc.add(new Field("f"+f,
                            data[f % data.length]
                            + data[r.nextInt(data.length)],
                            Field.Store.YES,
                            Field.Index.TOKENIZED));
        }
        writer.addDocument(doc);
      }
      writer.close();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    return dir;
  }

  private static Directory DIR = makeIndex();

  /**
   * collector for field values in case JVM tries to optimize
   * away the field gets
   *
   * I'm probably being paranoid.
   */
  public static Set VALS = null;

  public void testLazy() throws Exception {
    Random r = new Random(BASE_SEED);

    IndexReader reader = IndexReader.open(DIR);
    for (int i = 0; i < NUM_ITERS; i++) {
      VALS = new HashSet();
      int docId = r.nextInt(NUM_DOCS - FUDGE);

      // zero-th lazy hit
      Document d = reader.document(docId, SELECTOR);
      VALS.add(d.get(MAGIC_FIELD));

      // remaining full hits, reuse doc
      for (int h = 1; h <= NUM_HITS; h++) {
        for (int f = 1; f <= NUM_FIELDS; f++) {
          VALS.add(d.get("f"+f));
        }
      }
      VALS = null;
    }
    reader.close();
  }

  public void testComplete() throws Exception {
    Random r = new Random(BASE_SEED);

    IndexReader reader = IndexReader.open(DIR);
    for (int i = 0; i < NUM_ITERS; i++) {
      VALS = new HashSet();
      int docId = r.nextInt(NUM_DOCS - FUDGE);

      // zero-th lazy hit
      Document d = reader.document(docId, SELECTOR);
      VALS.add(d.get(MAGIC_FIELD));

      // first full hit, fetch complete document
      d = reader.document(docId);
      for (int f = 1; f <= NUM_FIELDS; f++) {
        VALS.add(d.get("f"+f));
      }

      // remaining hits
      for (int h = 2; h <= NUM_HITS; h++) {
        for (int f = 1; f <= NUM_FIELDS; f++) {
          VALS.add(d.get("f"+f));
        }
      }
      VALS = null;
    }
    reader.close();
  }

  public void testLazyA() throws Exception { testLazy(); }
  public void testCompleteA() throws Exception { testComplete(); }

  public void testLazyB() throws Exception { testLazy(); }
  public void testCompleteB() throws Exception { testComplete(); }

  public void testLazyC() throws Exception { testLazy(); }
  public void testCompleteC() throws Exception { testComplete(); }

  public void testLazyD() throws Exception { testLazy(); }
  public void testCompleteD() throws Exception { testComplete(); }

}

Reply via email to