Lucene Spell Index Not giving Proper Result -------------------------------------------
Key: LUCENE-827 URL: https://issues.apache.org/jira/browse/LUCENE-827 Project: Lucene - Java Issue Type: Bug Environment: Windows XP, Linux Reporter: Gaurav Gupta I am passing List of words 'Mayur Vihar Center Circle Udyog Vihar Noida Gurgaon' to create spell index from Lucene Index. when i searches for correct word for 'Centrer' i.e 'Center', it does'nt find it. I checked it whether its there in spell Index, i didnt find it there. By making the spell Index directly from Plain text Dictionary gives me the correct word for 'centre' i.e 'center'. I cant understand why it is behaving like this. Also attaching the source -: CreateDataStructure creates the Lucene Index and initializeSpellChecker initializes the spell Checker. import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.sql.SQLException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.spell.LuceneDictionary; import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class SpellCheckImpl implements SpellCheck{ private String SIMPLE_DIRECTORY; private String SPELL_DIRECTORY; private String DATA_TEXT_FILE; private String DEFAULT_FIELD="field"; private SpellCheckImpl(){ } //Configure the directories public SpellCheckImpl(String directoryPath){ File f = new File(directoryPath); if(f.isDirectory()){ this.SIMPLE_DIRECTORY = directoryPath+"/test"; this.SPELL_DIRECTORY = directoryPath+"/sp"; File simple = new File(this.SIMPLE_DIRECTORY); File spell = new File(this.SPELL_DIRECTORY); if(!simple.isDirectory()){ simple.mkdir(); } if(!spell.isDirectory()){ spell.mkdir(); } } } /** * Initialize the Dictionary with given Keywords */ public void initialize(String filePath){ this.DATA_TEXT_FILE = filePath; try{ createDataStructure(SIMPLE_DIRECTORY); initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY); }catch(Exception e){ System.out.println("Initialization failed "+e.getMessage()); } } /** * This method creates the index for the list of good words at the given location. * @param origDirLocation * @param dictionaryType * @throws IOException * @throws InstantiationException * @throws IllegalAccessException * @throws ClassNotFoundException * @throws SQLException */ private void createDataStructure(String origDirLocation) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException{ Directory directory = FSDirectory.getDirectory(origDirLocation, true); Analyzer analyzer = new StandardAnalyzer(); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); long time=System.currentTimeMillis(); InputStream is = null; is = new FileInputStream(new File(DATA_TEXT_FILE)); Document doc = new Document(); //doc.add(Field.Text(DEFAULT_FIELD, (Reader) new InputStreamReader(is))); doc.add(new Field(DEFAULT_FIELD, "Mayur Vihar Center Circle Udyog Vihar Noida Gurgaon", Field.Store.YES, Field.Index.TOKENIZED)); iwriter.addDocument(doc); iwriter.optimize(); time=System.currentTimeMillis()-time; System.out.println("time to Create Lucene Index "+time); iwriter.close(); } /** * This method creates the spell checker dictionary from the words directory at the specified location. * @param origDirLocation * @param spellDirLocation * @throws IOException */ private void initializeSpellChecker(String origDirLocation, String spellDirLocation) throws IOException{ FSDirectory origDir = FSDirectory.getDirectory(origDirLocation, false); FSDirectory spellDir = FSDirectory.getDirectory(spellDirLocation, true); long time=System.currentTimeMillis(); IndexReader indexReader = null; indexReader = IndexReader.open(origDir); SpellChecker spellChecker = new SpellChecker(spellDir); spellChecker.indexDictionnary(new LuceneDictionary(indexReader, DEFAULT_FIELD)); time=System.currentTimeMillis()-time; System.out.println("time to build Spell Checker Dictionary "+time); } public String[] GetMatches(String badWord) throws ParseException { SpellChecker spellChecker = null; try { spellChecker = new SpellChecker(FSDirectory.getDirectory(SPELL_DIRECTORY,false)); spellChecker.setAccuraty(0); if(spellChecker.exist(badWord)){ System.out.println("here"); } String[] similarWords = spellChecker.suggestSimilar(badWord, 25); return similarWords; } catch (IOException e) { throw new ParseException(e.getMessage()); } } public String GetBestMatch(String badWord) throws ParseException { String[] correctWords = GetMatches(badWord); if(correctWords != null && correctWords.length > 0){ return correctWords[0]; } return "No Correct Spelling Found"; } public void addWords(String word) throws IOException{ long time=System.currentTimeMillis(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(SIMPLE_DIRECTORY, analyzer, false); if(word != null && !"".equals(word)){ Document doc = new Document(); doc.add(new Field(DEFAULT_FIELD, word , Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } writer.optimize(); writer.close(); time=System.currentTimeMillis()-time; initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY); System.out.println("time to add words "+time); } public void addWords(String[] word) throws IOException{ long time=System.currentTimeMillis(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(SIMPLE_DIRECTORY, analyzer, false); if(word != null){ for(int i=0;i<word.length;i++){ if(word[i] != null && !"".equals(word[i])){ Document doc = new Document(); doc.add(new Field(DEFAULT_FIELD, word[i] , Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); } } } writer.optimize(); writer.close(); time=System.currentTimeMillis()-time; initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY); System.out.println("time to add words "+time); } } -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online. --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]