Hi everybody
I'm using Lucene3.6 to index Wikipedia documents which is over 3 million 
article, the data is on a mysql database and it is taking more than 24 hours so 
far.Do you know any tips that can speed up the indexing process
here is mycode:
public static void main(String[] args) {             String indexPath = 
INDEXPATH;           IndexWriter writer = null;       DatabaseConfiguration 
dbConfig = new DatabaseConfiguration();           dbConfig.setHost(host);       
  dbConfig.setDatabase(data);             dbConfig.setUser(user);         
dbConfig.setPassword(password);         dbConfig.setLanguage(Language.english);
                  try {           Directory dir = FSDirectory.open(new 
File(indexPath));                  Analyzer analyzer = new 
StandardAnalyzer(Version.LUCENE_31);        IndexWriterConfig iwc = new 
IndexWriterConfig(Version.LUCENE_31, analyzer);             
iwc.setOpenMode(OpenMode.CREATE);       writer = new IndexWriter(dir, iwc);     
                    }               catch (IOException e) {                     
System.out.println(" caught a " + e.getClass() +                 "\n with 
message: " + e.getMessage());               }                             try { 
                        Wikipedia wiki = new Wikipedia(dbConfig);               
                Iterable<Page> wikipages = wiki.getPages(); //get wikipedia 
articles from the database                          Iterator iter = 
wikipages.iterator();                           while(iter.hasNext()){          
                Page p = (Page)iter.next();                             
System.out.println(p.getTitle().getPlainTitle());                               
    Document doc = new Document();                                  Field 
contentField = new Field("contents", p.getPlainText(), Field.Store.NO, 
Field.Index.ANALYZED);                             Field titleField = new 
Field("title", p.getTitle().getPlainTitle(),Field.Store.YES, 
Field.Index.NOT_ANALYZED );                                 
doc.add(contentField); // wiki page text                                
doc.add(titleField); // wiki page title                                 
writer.addDocument(doc);                            }                       } 
catch (Exception e) {                         e.printStackTrace();              
      }                                 }
                                          

Reply via email to