Hi,
I have a problem using TermQuery and FuzzyQuery for terms containing the
character "-". Considering I've indexed "jack" and "jack-bauer" as 2 tokenized
captions, I get no result when searching for "jack-bauer". Moreover, "jack"
with a TermQuery returns the two captions.
What should I do to get "jack-bauer" with new TermQuery("jack-bauer") ?
A full test case is given below.
Thanks,
Tom
import junit.framework.Assert;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
public class IDebugIndexTest {
@Test
public void TermQueryTest() {
Analyzer analyser = new SimpleAnalyzer();
try {
// write docs to new index
IndexWriter writer = new IndexWriter(FSDirectory
.getDirectory("/tmp/idx_test"), analyser, true);
Document jack = new Document();
jack.add(new Field("caption", "jack", Field.Store.YES,
Field.Index.TOKENIZED));
writer.addDocument(jack);
Document jackBauer = new Document();
jackBauer.add(new Field("caption", "jack-bauer", Field.Store.YES,
Field.Index.TOKENIZED));
writer.addDocument(jackBauer);
writer.close();
// try to search
IndexSearcher s = new IndexSearcher(IndexReader.open(FSDirectory
.getDirectory("/tmp/idx_test")));
// The next assertion is ok
Hits jackHits = s
.search(new TermQuery(new Term("caption", "jack")));
Assert.assertEquals(jackHits.length(), 2);
// The next assertion fails !!!
Hits jackBauerHits = s.search(new TermQuery(new Term("caption",
"jack-bauer")));
Assert.assertEquals(jackBauerHits.length(), 1);
} catch (Exception e) {
Assert.fail();
}
}
}