Lucene Spell Index Not giving Proper Result
-------------------------------------------
Key: LUCENE-827
URL: https://issues.apache.org/jira/browse/LUCENE-827
Project: Lucene - Java
Issue Type: Bug
Environment: Windows XP, Linux
Reporter: Gaurav Gupta
I am passing List of words 'Mayur Vihar Center Circle Udyog Vihar Noida
Gurgaon' to create spell index from Lucene Index. when i searches for correct
word for 'Centrer' i.e 'Center', it does'nt find it. I checked it whether its
there in spell Index, i didnt find it there.
By making the spell Index directly from Plain text Dictionary gives me the
correct word for 'centre' i.e 'center'. I cant understand why it is behaving
like this.
Also attaching the source -:
CreateDataStructure creates the Lucene Index and initializeSpellChecker
initializes the spell Checker.
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.SQLException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class SpellCheckImpl implements SpellCheck{
private String SIMPLE_DIRECTORY;
private String SPELL_DIRECTORY;
private String DATA_TEXT_FILE;
private String DEFAULT_FIELD="field";
private SpellCheckImpl(){
}
//Configure the directories
public SpellCheckImpl(String directoryPath){
File f = new File(directoryPath);
if(f.isDirectory()){
this.SIMPLE_DIRECTORY = directoryPath+"/test";
this.SPELL_DIRECTORY = directoryPath+"/sp";
File simple = new File(this.SIMPLE_DIRECTORY);
File spell = new File(this.SPELL_DIRECTORY);
if(!simple.isDirectory()){
simple.mkdir();
}
if(!spell.isDirectory()){
spell.mkdir();
}
}
}
/**
* Initialize the Dictionary with given Keywords
*/
public void initialize(String filePath){
this.DATA_TEXT_FILE = filePath;
try{
createDataStructure(SIMPLE_DIRECTORY);
initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
}catch(Exception e){
System.out.println("Initialization failed
"+e.getMessage());
}
}
/**
* This method creates the index for the list of good words at the
given location.
* @param origDirLocation
* @param dictionaryType
* @throws IOException
* @throws InstantiationException
* @throws IllegalAccessException
* @throws ClassNotFoundException
* @throws SQLException
*/
private void createDataStructure(String origDirLocation) throws
IOException, InstantiationException,
IllegalAccessException, ClassNotFoundException, SQLException{
Directory directory = FSDirectory.getDirectory(origDirLocation,
true);
Analyzer analyzer = new StandardAnalyzer();
IndexWriter iwriter = new IndexWriter(directory, analyzer,
true);
long time=System.currentTimeMillis();
InputStream is = null;
is = new FileInputStream(new File(DATA_TEXT_FILE));
Document doc = new Document();
//doc.add(Field.Text(DEFAULT_FIELD, (Reader) new
InputStreamReader(is)));
doc.add(new Field(DEFAULT_FIELD, "Mayur Vihar Center Circle
Udyog Vihar Noida Gurgaon", Field.Store.YES, Field.Index.TOKENIZED));
iwriter.addDocument(doc);
iwriter.optimize();
time=System.currentTimeMillis()-time;
System.out.println("time to Create Lucene Index "+time);
iwriter.close();
}
/**
* This method creates the spell checker dictionary from the words
directory at the specified location.
* @param origDirLocation
* @param spellDirLocation
* @throws IOException
*/
private void initializeSpellChecker(String origDirLocation, String
spellDirLocation) throws IOException{
FSDirectory origDir = FSDirectory.getDirectory(origDirLocation,
false);
FSDirectory spellDir =
FSDirectory.getDirectory(spellDirLocation, true);
long time=System.currentTimeMillis();
IndexReader indexReader = null;
indexReader = IndexReader.open(origDir);
SpellChecker spellChecker = new SpellChecker(spellDir);
spellChecker.indexDictionnary(new LuceneDictionary(indexReader,
DEFAULT_FIELD));
time=System.currentTimeMillis()-time;
System.out.println("time to build Spell Checker Dictionary
"+time);
}
public String[] GetMatches(String badWord) throws ParseException
{
SpellChecker spellChecker = null;
try
{
spellChecker = new
SpellChecker(FSDirectory.getDirectory(SPELL_DIRECTORY,false));
spellChecker.setAccuraty(0);
if(spellChecker.exist(badWord)){
System.out.println("here");
}
String[] similarWords =
spellChecker.suggestSimilar(badWord, 25);
return similarWords;
}
catch (IOException e)
{
throw new ParseException(e.getMessage());
}
}
public String GetBestMatch(String badWord) throws ParseException
{
String[] correctWords = GetMatches(badWord);
if(correctWords != null && correctWords.length > 0){
return correctWords[0];
}
return "No Correct Spelling Found";
}
public void addWords(String word) throws IOException{
long time=System.currentTimeMillis();
Analyzer analyzer = new StandardAnalyzer();
IndexWriter writer = new IndexWriter(SIMPLE_DIRECTORY,
analyzer, false);
if(word != null && !"".equals(word)){
Document doc = new Document();
doc.add(new Field(DEFAULT_FIELD, word ,
Field.Store.YES,
Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
time=System.currentTimeMillis()-time;
initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
System.out.println("time to add words "+time);
}
public void addWords(String[] word) throws IOException{
long time=System.currentTimeMillis();
Analyzer analyzer = new StandardAnalyzer();
IndexWriter writer = new IndexWriter(SIMPLE_DIRECTORY,
analyzer, false);
if(word != null){
for(int i=0;i<word.length;i++){
if(word[i] != null && !"".equals(word[i])){
Document doc = new Document();
doc.add(new Field(DEFAULT_FIELD,
word[i] , Field.Store.YES,
Field.Index.TOKENIZED));
writer.addDocument(doc);
}
}
}
writer.optimize();
writer.close();
time=System.currentTimeMillis()-time;
initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
System.out.println("time to add words "+time);
}
}
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]