Yeah, it's off trunk - I'll submit a 3X patch in a bit - just have to change that to an IndexReader I believe.
- Mark On Jul 21, 2011, at 4:01 PM, Peter Keegan wrote: > Does this patch require the trunk version? I'm using 3.2 and > 'AtomicReaderContext' isn't there. > > Peter > > On Thu, Jul 21, 2011 at 3:07 PM, Mark Miller <markrmil...@gmail.com> wrote: > >> Hey Peter, >> >> Getting sucked back into Spans... >> >> That test should pass now - I uploaded a new patch to >> https://issues.apache.org/jira/browse/LUCENE-777 >> >> Further tests may be needed though. >> >> - Mark >> >> >> On Jul 21, 2011, at 9:28 AM, Peter Keegan wrote: >> >>> Hi Mark, >>> >>> Here is a unit test using a version of 'SpanWithinQuery' modified for 3.2 >>> ('getTerms' removed) . The last test fails (search for "1" and "3"). >>> >>> package org.apache.lucene.search.spans; >>> >>> import java.io.Reader; >>> >>> import org.apache.lucene.analysis.Analyzer; >>> import org.apache.lucene.analysis.TokenStream; >>> import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; >>> import >>> org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; >>> import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; >>> import org.apache.lucene.document.Document; >>> import org.apache.lucene.document.Field; >>> import org.apache.lucene.index.IndexReader; >>> import org.apache.lucene.index.RandomIndexWriter; >>> import org.apache.lucene.index.Term; >>> import org.apache.lucene.store.Directory; >>> import org.apache.lucene.search.IndexSearcher; >>> import org.apache.lucene.search.PhraseQuery; >>> import org.apache.lucene.search.ScoreDoc; >>> import org.apache.lucene.search.TermQuery; >>> import org.apache.lucene.search.spans.SpanNearQuery; >>> import org.apache.lucene.search.spans.SpanQuery; >>> import org.apache.lucene.search.spans.SpanTermQuery; >>> import org.apache.lucene.util.LuceneTestCase; >>> >>> public class TestSentence extends LuceneTestCase { >>> public static final String field = "field"; >>> public static final String START = "^"; >>> public static final String END = "$"; >>> public void testSetPosition() throws Exception { >>> Analyzer analyzer = new Analyzer() { >>> @Override >>> public TokenStream tokenStream(String fieldName, Reader reader) { >>> return new TokenStream() { >>> private final String[] TOKENS = {"1", "2", "3", END, "4", "5", "6", END, >>> "9"}; >>> private final int[] INCREMENTS = {1,1,1,0,1,1,1,0,1}; >>> private int i = 0; >>> >>> PositionIncrementAttribute posIncrAtt = >>> addAttribute(PositionIncrementAttribute.class); >>> CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); >>> OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); >>> >>> @Override >>> public boolean incrementToken() { >>> assertEquals(TOKENS.length, INCREMENTS.length); >>> if (i == TOKENS.length) >>> return false; >>> clearAttributes(); >>> termAtt.append(TOKENS[i]); >>> offsetAtt.setOffset(i,i); >>> posIncrAtt.setPositionIncrement(INCREMENTS[i]); >>> i++; >>> return true; >>> } >>> }; >>> } >>> }; >>> Directory store = newDirectory(); >>> RandomIndexWriter writer = new RandomIndexWriter(random, store, >> analyzer); >>> Document d = new Document(); >>> d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); >>> writer.addDocument(d); >>> IndexReader reader = writer.getReader(); >>> writer.close(); >>> IndexSearcher searcher = newSearcher(reader); >>> >>> SpanTermQuery startSentence = makeSpanTermQuery(START); >>> SpanTermQuery endSentence = makeSpanTermQuery(END); >>> SpanQuery[] clauses = new SpanQuery[2]; >>> clauses[0] = makeSpanTermQuery("1"); >>> clauses[1] = makeSpanTermQuery("2"); >>> SpanNearQuery allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, >>> false); // SpanAndQuery equivalent >>> SpanWithinQuery query = new SpanWithinQuery(allKeywords, endSentence, 0); >>> System.out.println("query: "+query); >>> ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; >>> assertEquals(hits.length, 1); >>> >>> clauses[1] = makeSpanTermQuery("4"); >>> allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // >>> SpanAndQuery equivalent >>> query = new SpanWithinQuery(allKeywords, endSentence, 0); >>> System.out.println("query: "+query); >>> hits = searcher.search(query, null, 1000).scoreDocs; >>> assertEquals(hits.length, 0); >>> >>> PhraseQuery pq = new PhraseQuery(); >>> pq.add(new Term(field, "3")); >>> pq.add(new Term(field, "4")); >>> hits = searcher.search(pq, null, 1000).scoreDocs; >>> assertEquals(hits.length, 1); >>> >>> clauses[1] = makeSpanTermQuery("3"); >>> allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // >>> SpanAndQuery equivalent >>> query = new SpanWithinQuery(allKeywords, endSentence, 0); >>> System.out.println("query: "+query); >>> hits = searcher.search(query, null, 1000).scoreDocs; >>> assertEquals(hits.length, 1); >>> >>> >>> } >>> >>> public SpanTermQuery makeSpanTermQuery(String text) { >>> return new SpanTermQuery(new Term(field, text)); >>> } >>> public TermQuery makeTermQuery(String text) { >>> return new TermQuery(new Term(field, text)); >>> } >>> } >>> >>> Peter >>> >>> On Wed, Jul 20, 2011 at 9:22 PM, Mark Miller <markrmil...@gmail.com> >> wrote: >>> >>>> >>>> On Jul 20, 2011, at 7:44 PM, Mark Miller wrote: >>>> >>>>> >>>>> On Jul 20, 2011, at 11:27 AM, Peter Keegan wrote: >>>>> >>>>>> Mark Miller's 'SpanWithinQuery' patch >>>>>> seems to have the same issue. >>>>> >>>>> If I remember right (It's been more the a couple years), I did index >> the >>>> sentence markers at the same position as the last word in the sentence. >> And >>>> I think the limitation that I ate was that the word could belong to both >>>> it's true sentence, and the one after it. >>>>> >>>>> - Mark Miller >>>>> lucidimagination.com >>>> >>>> Perhaps you could index the sentence marker at both the last word of the >>>> sentence as well as the first word of the next sentence if there is one. >>>> This would seem to solve the above limitation as well? >>>> >>>> - Mark Miller >>>> lucidimagination.com >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> >>>> --------------------------------------------------------------------- >>>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>>> >>>> >> >> - Mark Miller >> lucidimagination.com >> >> >> >> >> >> >> >> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >> For additional commands, e-mail: java-user-h...@lucene.apache.org >> >> - Mark Miller lucidimagination.com --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org