hi all
 following is some code that i use to index the contents of a table  ( there
are 18746 records in the table. )
 using a database result set , i loop over all the records ,
 creating a document object and indexing into ramDirectory and then onto the
fileSystem

 when I open a IndexReader and output numDoc i get 18740,

 How ever on running the same code, but using a FSDirectory object on
opening a IndexReader I get 18476

 has anyone else come across this behaviour ? jdk being used is 1.4.1


public class JournalIndexer extends  JournalConstants {
    IndexWriter ramWriter ;
    Directory ramDirectory;
    String dir;
    public JournalIndexer(String dir) throws  Exception{
        this.dir = dir;
        ramDirectory = new RAMDirectory();
        ramWriter = new IndexWriter( ramDirectory, new SimpleAnalyzer()
,true );
    }

    public static void main(String args[]) throws Exception {
        Statement stmt   = connection.createStatement();
        JournalIndexer indexer = new JournalIndexer("journals");
         int main_counter = 0;
        // SELECT ID, JOURNALTITLE, NLM_ID, ISSN, MEDLINE_ABBREVIATION,
ISO_ABBREVIATION, ESSN "+
        ResultSet rs = stmt.executeQuery(sqlFetchJournals);
        while(rs.next() ){
            Journal journal = new Journal();
                ///set values
            main_counter++;
            indexer.add( journal );
        }
        indexer.close();
    }

    int count = 0;

    public void add(Journal journal) throws Exception {
        Document  j_doc = new Document();
       //Field(String name      , String    string, boolean store, boolean
index, boolean token)
        Field id     = new Field(ID,""+journal.getId(), true, true, false );
        j_doc.add( id );
        ramWriter.addDocument( j_doc );
         count++;

    }

    public void close() throws  Exception {
        IndexWriter fileWriter = new IndexWriter(
FSDirectory.getDirectory(dir,true), new SimpleAnalyzer(),true);
        Directory dirs[] = { ramDirectory };
        fileWriter.addIndexes( dirs );
        fileWriter.optimize();
        fileWriter.close();
    }

   class JournalAnalyzer extends Analyzer {
     public TokenStream tokenStream(String field,Reader reader)  {
        TokenStream result = new WhitespaceTokenizer(reader);
        result = new LowerCaseFilter(result);
        return  result;
     }
   }

}


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to