Hi, You forgot to close (or commit) IndexWriter before opening the reader.
Uwe ----- Uwe Schindler H.-H.-Meier-Allee 63, D-28213 Bremen http://www.thetaphi.de eMail: u...@thetaphi.de > -----Original Message----- > From: Trejkaz [mailto:trej...@trypticon.org] > Sent: Tuesday, August 19, 2014 6:50 AM > To: Lucene Users Mailing List > Subject: Can some terms from analysis be silently dropped when indexing? > Because I'm pretty sure I'm seeing that happen. > > Unrelated to my previous mail to the list, but related to the same > investigation... > > The following test program just indexes a phrase of nonsense words using > and then queries for one of the words using the same analyser. > > The same analyser is being used both for indexing and for querying, yet in > the latter case, no search results come back. > > I added in debugging to print out the TermPositions for all terms and also the > parsed query - lo and behold, two of the terms have disappeared on the way > into the index. > > Where do the other two terms go? > > TX > > (PS doesn't this also mean that it's impossible to use the same analyser for > both indexing and query? At least in some cases.) > > > import org.apache.lucene.analysis.Analyzer; > import org.apache.lucene.analysis.ja.JapaneseAnalyzer; > import org.apache.lucene.document.Document; > import org.apache.lucene.document.Field; > import org.apache.lucene.index.IndexReader; > import org.apache.lucene.index.IndexWriter; > import org.apache.lucene.index.IndexWriterConfig; > import org.apache.lucene.index.Term; > import org.apache.lucene.index.TermEnum; > import org.apache.lucene.index.TermPositions; > import org.apache.lucene.queryParser.standard.StandardQueryParser; > import > org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandle > r; > import org.apache.lucene.search.IndexSearcher; > import org.apache.lucene.search.Query; > import org.apache.lucene.search.TopDocs; > import org.apache.lucene.store.Directory; > import org.apache.lucene.store.RAMDirectory; > import org.apache.lucene.util.Version; > import org.junit.Test; > > import static org.hamcrest.Matchers.*; > import static org.junit.Assert.*; > > public class TestJapaneseAnalysis { > @Test > public void testJapaneseAnalysis() throws Exception { > try (Directory directory = new RAMDirectory()) { > Analyzer analyser = new JapaneseAnalyzer(Version.LUCENE_36); > > try (IndexWriter writer = new IndexWriter(directory, new > IndexWriterConfig(Version.LUCENE_36, analyser))) { > Document document = new Document(); > document.add(new Field("content", "blah blah commercial > blah > blah \u79CB\u8449\u539F blah blah", Field.Store.NO, > Field.Index.ANALYZED)); > writer.addDocument(document); > } > > try (IndexReader reader = IndexReader.open(directory); > TermEnum terms = reader.terms(new Term("content", "")); > TermPositions termPositions = reader.termPositions()) { > do { > Term term = terms.term(); > if (term.field() != "content") { > break; > } > > System.out.println(term); > termPositions.seek(terms); > > while (termPositions.next()) { > System.out.println(" " + termPositions.doc()); > int freq = termPositions.freq(); > for (int i = 0; i < freq; i++) { > System.out.println(" " + > termPositions.nextPosition()); > } > } > } > while (terms.next()); > > StandardQueryParser queryParser = new > StandardQueryParser(analyser); > > queryParser.setDefaultOperator(StandardQueryConfigHandler.Operator.AN > D); > // quoted to work around strange behaviour of > StandardQueryParser treating this as a boolean query. > Query query = > queryParser.parse("\"\u79CB\u8449\u539F\"", "content"); > System.out.println(query); > > TopDocs topDocs = new IndexSearcher(reader).search(query, > 10); > assertThat(topDocs.totalHits, is(1)); > } > } > } > } > > /* > Output: > > content:blah > 0 > 0 > 1 > 3 > 4 > 6 > 7 > content:commercial > 0 > 2 > content:秋葉原 > 0 > 5 > content:"(秋葉 秋葉原) 原" > */ > > --------------------------------------------------------------------- > To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org > For additional commands, e-mail: java-user-h...@lucene.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org