[ http://issues.apache.org/jira/browse/LUCENE-527?page=all ] Erik Hatcher closed LUCENE-527: -------------------------------
Resolution: Invalid > Bug in the TermDocs.freq() method? > ----------------------------------- > > Key: LUCENE-527 > URL: http://issues.apache.org/jira/browse/LUCENE-527 > Project: Lucene - Java > Type: Bug > Versions: 1.9 > Environment: Scientific linux > Reporter: HÃ¥kon T. Bommen > > I belive I get incorrect data from the TermDocs.freq() method. The attached > code demonstrate this. Document one has correct term count. In document zero > and two, the term "stored" and "indexed" is reported to occure once in both > documents. This is incorrect. > // LuceneTest.java > import org.apache.lucene.analysis.Analyzer; > import org.apache.lucene.analysis.standard.StandardAnalyzer; > import org.apache.lucene.queryParser.ParseException; > import org.apache.lucene.document.*; > import org.apache.lucene.index.*; > import org.apache.lucene.search.*; > import org.apache.lucene.queryParser.QueryParser; > import org.apache.lucene.store.RAMDirectory; > import org.apache.lucene.store.Directory; > public class LuceneTest{ > public LuceneTest(){} > public static void main(String[] args){ > IndexWriter writer; > IndexReader reader; > Searcher searcher; > Document doc; > Directory dir = new RAMDirectory(); > try{ > // create index > writer = new IndexWriter( dir , new StandardAnalyzer(), > true); > doc = new Document(); > doc.add( new Field( "title", "Doc 0", Field.Store.YES, > Field.Index.TOKENIZED ) ); > doc.add( new Field( "contents", "Text Text and more > Text", Field.Store.NO, Field.Index.TOKENIZED ) ); > writer.addDocument(doc); > doc = new Document(); > doc.add( new Field( "title", "Doc 1", Field.Store.YES, > Field.Index.TOKENIZED ) ); > doc.add( new Field( "contents", "This text is not > stored, only indexed.", Field.Store.NO, Field.Index.TOKENIZED ) ); > writer.addDocument(doc); > doc = new Document(); > doc.add( new Field( "title", "Doc 2", Field.Store.YES, > Field.Index.TOKENIZED ) ); > doc.add( new Field( "contents", "Text Text Text Text", > Field.Store.NO, Field.Index.TOKENIZED ) ); > writer.addDocument(doc); > writer.close(); > // search > searcher = new IndexSearcher(dir); > reader = IndexReader.open(dir); > QueryParser qp = new QueryParser("contents", new > StandardAnalyzer()); > Query query = qp.parse("stored and indexed text"); > String[] terms = {"stored", "indexed", "text"}; > Hits queryHits = searcher.search(query); > // print results > System.out.println( "Found " + queryHits.length() + " > hits."); > for(int i=0; i<queryHits.length(); i++){ > doc = queryHits.doc(i); > System.out.println("*** " + doc.get("title") + > " ***"); > int docID = queryHits.id(i); > for (int j=0; j<terms.length; j++){ > TermDocs td = reader.termDocs( new > Term("contents", terms[j]) ); > td.skipTo(docID); > System.out.println( "Term '" + terms[j] > + "' occures " + > td.freq() + " time(s) in > document nr. " + docID ); > } > } > }catch(Exception e){System.out.println("Darn");} > } > } -- This message is automatically generated by JIRA. - If you think it was sent incorrectly contact one of the administrators: http://issues.apache.org/jira/secure/Administrators.jspa - For more information on JIRA, see: http://www.atlassian.com/software/jira --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]