I just uploaded a patch for 3X that will work for 3.2. On Jul 21, 2011, at 4:25 PM, Mark Miller wrote:
> Yeah, it's off trunk - I'll submit a 3X patch in a bit - just have to change > that to an IndexReader I believe. > > - Mark > > On Jul 21, 2011, at 4:01 PM, Peter Keegan wrote: > >> Does this patch require the trunk version? I'm using 3.2 and >> 'AtomicReaderContext' isn't there. >> >> Peter >> >> On Thu, Jul 21, 2011 at 3:07 PM, Mark Miller <markrmil...@gmail.com> wrote: >> >>> Hey Peter, >>> >>> Getting sucked back into Spans... >>> >>> That test should pass now - I uploaded a new patch to >>> https://issues.apache.org/jira/browse/LUCENE-777 >>> >>> Further tests may be needed though. >>> >>> - Mark >>> >>> >>> On Jul 21, 2011, at 9:28 AM, Peter Keegan wrote: >>> >>>> Hi Mark, >>>> >>>> Here is a unit test using a version of 'SpanWithinQuery' modified for 3.2 >>>> ('getTerms' removed) . The last test fails (search for "1" and "3"). >>>> >>>> package org.apache.lucene.search.spans; >>>> >>>> import java.io.Reader; >>>> >>>> import org.apache.lucene.analysis.Analyzer; >>>> import org.apache.lucene.analysis.TokenStream; >>>> import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; >>>> import >>>> org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; >>>> import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; >>>> import org.apache.lucene.document.Document; >>>> import org.apache.lucene.document.Field; >>>> import org.apache.lucene.index.IndexReader; >>>> import org.apache.lucene.index.RandomIndexWriter; >>>> import org.apache.lucene.index.Term; >>>> import org.apache.lucene.store.Directory; >>>> import org.apache.lucene.search.IndexSearcher; >>>> import org.apache.lucene.search.PhraseQuery; >>>> import org.apache.lucene.search.ScoreDoc; >>>> import org.apache.lucene.search.TermQuery; >>>> import org.apache.lucene.search.spans.SpanNearQuery; >>>> import org.apache.lucene.search.spans.SpanQuery; >>>> import org.apache.lucene.search.spans.SpanTermQuery; >>>> import org.apache.lucene.util.LuceneTestCase; >>>> >>>> public class TestSentence extends LuceneTestCase { >>>> public static final String field = "field"; >>>> public static final String START = "^"; >>>> public static final String END = "$"; >>>> public void testSetPosition() throws Exception { >>>> Analyzer analyzer = new Analyzer() { >>>> @Override >>>> public TokenStream tokenStream(String fieldName, Reader reader) { >>>> return new TokenStream() { >>>> private final String[] TOKENS = {"1", "2", "3", END, "4", "5", "6", END, >>>> "9"}; >>>> private final int[] INCREMENTS = {1,1,1,0,1,1,1,0,1}; >>>> private int i = 0; >>>> >>>> PositionIncrementAttribute posIncrAtt = >>>> addAttribute(PositionIncrementAttribute.class); >>>> CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); >>>> OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); >>>> >>>> @Override >>>> public boolean incrementToken() { >>>> assertEquals(TOKENS.length, INCREMENTS.length); >>>> if (i == TOKENS.length) >>>> return false; >>>> clearAttributes(); >>>> termAtt.append(TOKENS[i]); >>>> offsetAtt.setOffset(i,i); >>>> posIncrAtt.setPositionIncrement(INCREMENTS[i]); >>>> i++; >>>> return true; >>>> } >>>> }; >>>> } >>>> }; >>>> Directory store = newDirectory(); >>>> RandomIndexWriter writer = new RandomIndexWriter(random, store, >>> analyzer); >>>> Document d = new Document(); >>>> d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); >>>> writer.addDocument(d); >>>> IndexReader reader = writer.getReader(); >>>> writer.close(); >>>> IndexSearcher searcher = newSearcher(reader); >>>> >>>> SpanTermQuery startSentence = makeSpanTermQuery(START); >>>> SpanTermQuery endSentence = makeSpanTermQuery(END); >>>> SpanQuery[] clauses = new SpanQuery[2]; >>>> clauses[0] = makeSpanTermQuery("1"); >>>> clauses[1] = makeSpanTermQuery("2"); >>>> SpanNearQuery allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, >>>> false); // SpanAndQuery equivalent >>>> SpanWithinQuery query = new SpanWithinQuery(allKeywords, endSentence, 0); >>>> System.out.println("query: "+query); >>>> ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; >>>> assertEquals(hits.length, 1); >>>> >>>> clauses[1] = makeSpanTermQuery("4"); >>>> allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // >>>> SpanAndQuery equivalent >>>> query = new SpanWithinQuery(allKeywords, endSentence, 0); >>>> System.out.println("query: "+query); >>>> hits = searcher.search(query, null, 1000).scoreDocs; >>>> assertEquals(hits.length, 0); >>>> >>>> PhraseQuery pq = new PhraseQuery(); >>>> pq.add(new Term(field, "3")); >>>> pq.add(new Term(field, "4")); >>>> hits = searcher.search(pq, null, 1000).scoreDocs; >>>> assertEquals(hits.length, 1); >>>> >>>> clauses[1] = makeSpanTermQuery("3"); >>>> allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // >>>> SpanAndQuery equivalent >>>> query = new SpanWithinQuery(allKeywords, endSentence, 0); >>>> System.out.println("query: "+query); >>>> hits = searcher.search(query, null, 1000).scoreDocs; >>>> assertEquals(hits.length, 1); >>>> >>>> >>>> } >>>> >>>> public SpanTermQuery makeSpanTermQuery(String text) { >>>> return new SpanTermQuery(new Term(field, text)); >>>> } >>>> public TermQuery makeTermQuery(String text) { >>>> return new TermQuery(new Term(field, text)); >>>> } >>>> } >>>> >>>> Peter >>>> >>>> On Wed, Jul 20, 2011 at 9:22 PM, Mark Miller <markrmil...@gmail.com> >>> wrote: >>>> >>>>> >>>>> On Jul 20, 2011, at 7:44 PM, Mark Miller wrote: >>>>> >>>>>> >>>>>> On Jul 20, 2011, at 11:27 AM, Peter Keegan wrote: >>>>>> >>>>>>> Mark Miller's 'SpanWithinQuery' patch >>>>>>> seems to have the same issue. >>>>>> >>>>>> If I remember right (It's been more the a couple years), I did index >>> the >>>>> sentence markers at the same position as the last word in the sentence. >>> And >>>>> I think the limitation that I ate was that the word could belong to both >>>>> it's true sentence, and the one after it. >>>>>> >>>>>> - Mark Miller >>>>>> lucidimagination.com >>>>> >>>>> Perhaps you could index the sentence marker at both the last word of the >>>>> sentence as well as the first word of the next sentence if there is one. >>>>> This would seem to solve the above limitation as well? >>>>> >>>>> - Mark Miller >>>>> lucidimagination.com >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> --------------------------------------------------------------------- >>>>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>>>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>>>> >>>>> >>> >>> - Mark Miller >>> lucidimagination.com >>> >>> >>> >>> >>> >>> >>> >>> >>> >>> --------------------------------------------------------------------- >>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>> >>> > > - Mark Miller > lucidimagination.com > > > > > > > > - Mark Miller lucidimagination.com --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org