Lucene Spell Index Not giving Proper Result
-------------------------------------------

                 Key: LUCENE-827
                 URL: https://issues.apache.org/jira/browse/LUCENE-827
             Project: Lucene - Java
          Issue Type: Bug
         Environment: Windows XP, Linux
            Reporter: Gaurav Gupta


I am passing List of words 'Mayur Vihar Center Circle Udyog Vihar Noida 
Gurgaon' to create spell index from Lucene Index. when i searches for correct 
word for 'Centrer' i.e 'Center', it does'nt find it. I checked it whether its 
there in spell Index, i didnt find it there.

By making the spell Index directly from Plain text Dictionary gives me the 
correct word for 'centre' i.e 'center'. I cant understand why it is behaving 
like this.



Also attaching the source -: 

CreateDataStructure creates the Lucene Index and initializeSpellChecker 
initializes the spell Checker.



import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.SQLException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class SpellCheckImpl implements SpellCheck{

        
        private String SIMPLE_DIRECTORY;
        private String SPELL_DIRECTORY;
        private String DATA_TEXT_FILE;
        private String DEFAULT_FIELD="field";
        
        
        
        private SpellCheckImpl(){
                
        }
        
        //Configure the directories
        
        public SpellCheckImpl(String directoryPath){
                
                File f = new File(directoryPath);
                
                if(f.isDirectory()){
                        
                        this.SIMPLE_DIRECTORY = directoryPath+"/test";
                        this.SPELL_DIRECTORY = directoryPath+"/sp";
                        
                        File simple = new File(this.SIMPLE_DIRECTORY);
                        File spell = new File(this.SPELL_DIRECTORY);
                        
                        if(!simple.isDirectory()){
                                simple.mkdir();
                        }
                        
                        if(!spell.isDirectory()){
                                spell.mkdir();
                        }
                        
                }
                
        }
        
        
        /**
         * Initialize the Dictionary with given Keywords
         */
        public void initialize(String filePath){
                
                this.DATA_TEXT_FILE = filePath;
                
                try{
                        
                        createDataStructure(SIMPLE_DIRECTORY);
                        
initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
                        
                }catch(Exception e){
                        System.out.println("Initialization failed 
"+e.getMessage());
                }
        }
        
        
        /**
         * This method creates the index for the list of good words at the 
given location.
         * @param origDirLocation
         * @param dictionaryType
         * @throws IOException
         * @throws InstantiationException
         * @throws IllegalAccessException
         * @throws ClassNotFoundException
         * @throws SQLException
         */
        private void createDataStructure(String origDirLocation) throws 
IOException, InstantiationException, 
        IllegalAccessException, ClassNotFoundException, SQLException{

                Directory directory = FSDirectory.getDirectory(origDirLocation, 
true);
                Analyzer analyzer = new StandardAnalyzer();
                IndexWriter iwriter = new IndexWriter(directory, analyzer, 
true);
                
                long time=System.currentTimeMillis();
                
                InputStream is = null;
                
                is = new FileInputStream(new File(DATA_TEXT_FILE));
                
                Document doc = new Document();
                
                //doc.add(Field.Text(DEFAULT_FIELD, (Reader) new 
InputStreamReader(is)));
                doc.add(new Field(DEFAULT_FIELD, "Mayur Vihar Center Circle 
Udyog Vihar Noida Gurgaon", Field.Store.YES, Field.Index.TOKENIZED));
                
                
                iwriter.addDocument(doc);
                iwriter.optimize();
                time=System.currentTimeMillis()-time;
                System.out.println("time to Create Lucene Index "+time);
                
                iwriter.close();

        }
        
        
        
        /**
         * This method creates the spell checker dictionary from the words 
directory at the specified location.
         * @param origDirLocation
         * @param spellDirLocation
         * @throws IOException
         */
        private void initializeSpellChecker(String origDirLocation, String 
spellDirLocation) throws IOException{
                FSDirectory origDir = FSDirectory.getDirectory(origDirLocation, 
false);
                FSDirectory spellDir = 
FSDirectory.getDirectory(spellDirLocation, true);
                
                long time=System.currentTimeMillis();
                
                IndexReader indexReader = null;
            indexReader = IndexReader.open(origDir);
            
            SpellChecker  spellChecker = new SpellChecker(spellDir);

            spellChecker.indexDictionnary(new LuceneDictionary(indexReader, 
DEFAULT_FIELD));
            
            time=System.currentTimeMillis()-time;
            
                System.out.println("time to build Spell Checker Dictionary 
"+time);
                
        }
        
        
        public String[] GetMatches(String badWord) throws ParseException 
        {
                SpellChecker spellChecker = null;
                try
                {
                        spellChecker = new 
SpellChecker(FSDirectory.getDirectory(SPELL_DIRECTORY,false));
                        
                        spellChecker.setAccuraty(0);
                        
                        if(spellChecker.exist(badWord)){
                                System.out.println("here");
                        }
                        
                        String[] similarWords = 
spellChecker.suggestSimilar(badWord, 25);
        
                        return similarWords;
                } 
                catch (IOException e) 
                {
                        throw new ParseException(e.getMessage());
                }
        }
        
        
        public String GetBestMatch(String badWord) throws ParseException
        {       
                
                String[] correctWords = GetMatches(badWord);
                
                if(correctWords != null && correctWords.length > 0){
                        return correctWords[0];
                }
                
                return "No Correct Spelling Found";
                
        }
        
        public void addWords(String word) throws IOException{
                
                
                long time=System.currentTimeMillis();
                Analyzer analyzer = new StandardAnalyzer();
                IndexWriter writer   = new IndexWriter(SIMPLE_DIRECTORY, 
analyzer, false);
                
                if(word != null && !"".equals(word)){
                        Document doc = new Document();
                                doc.add(new Field(DEFAULT_FIELD, word , 
Field.Store.YES,
                                                Field.Index.TOKENIZED));
                        writer.addDocument(doc);
                }
                
        writer.optimize();
        writer.close();
        time=System.currentTimeMillis()-time;
        initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
                System.out.println("time to add words "+time);
        }
        
        
                public void addWords(String[]   word) throws IOException{
                
                long time=System.currentTimeMillis();
                Analyzer analyzer = new StandardAnalyzer();
                IndexWriter writer   = new IndexWriter(SIMPLE_DIRECTORY, 
analyzer, false);
                
                if(word != null){
                        for(int i=0;i<word.length;i++){
                                if(word[i] != null && !"".equals(word[i])){
                                Document doc = new Document();
                                        doc.add(new Field(DEFAULT_FIELD, 
word[i] , Field.Store.YES,
                                                        Field.Index.TOKENIZED));
                                        writer.addDocument(doc);
                                }       
                        }
                }
                
        writer.optimize();
        writer.close();
        time=System.currentTimeMillis()-time;
        initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
                System.out.println("time to add words "+time);
        }
        
}


-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to