[
https://issues.apache.org/jira/browse/LUCENE-5396?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15458572#comment-15458572
]
Tim Allison commented on LUCENE-5396:
-------------------------------------
Same behavior still in trunk. Happy to submit failing test that works on trunk
if anyone wants it.
> SpanNearQuery returns single term spans
> ---------------------------------------
>
> Key: LUCENE-5396
> URL: https://issues.apache.org/jira/browse/LUCENE-5396
> Project: Lucene - Core
> Issue Type: Bug
> Components: core/search
> Reporter: Piotr Pęzik
>
> Let's assume we have an index with two documents:
> 1. contents: "test bunga bunga test"
> 2. contents: "test bunga test"
> We run two SpanNearQueries against this index:
> 1. spanNear([contents:bunga, contents:bunga], 0, true)
> 2. spanNear([contents:bunga, contents:bunga], 0, false)
> For the first query we get 1 hit. The first document in the example above
> gets matched and the second one doesn't. This make sense, because we want the
> term "bunga" followed by another "bunga" here.
> However, both documents get matched by the second query. This is also
> problematic in cases where we have duplicate terms in longer (unordered)
> spannear queries, e. g.: unordered 'A B A' will match spans such as 'A B' or
> 'B A'.
> A complete example follows.
> ---------
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.TextField;
> import org.apache.lucene.index.DirectoryReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.IndexWriterConfig;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.TopDocs;
> import org.apache.lucene.search.spans.SpanNearQuery;
> import org.apache.lucene.search.spans.SpanQuery;
> import org.apache.lucene.search.spans.SpanTermQuery;
> import org.apache.lucene.store.Directory;
> import org.apache.lucene.store.FSDirectory;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.util.Version;
> import java.io.StringReader;
> import static org.junit.Assert.assertEquals;
> class SpansBug {
> public static void main(String [] args) throws Exception {
> Directory dir = new RAMDirectory();
> Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
> IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45,
> analyzer);
> IndexWriter writer = new IndexWriter(dir, iwc);
> String contents = "contents";
> Document doc1 = new Document();
> doc1.add(new TextField(contents, new StringReader("test bunga bunga
> test")));
> Document doc2 = new Document();
> doc2.add(new TextField(contents, new StringReader("test bunga
> test")));
> writer.addDocument(doc1);
> writer.addDocument(doc2);
> writer.commit();
> IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
> SpanQuery stq1 = new SpanTermQuery(new Term(contents,"bunga"));
> SpanQuery stq2 = new SpanTermQuery(new Term(contents,"bunga"));
> SpanQuery [] spqa = new SpanQuery[]{stq1,stq2};
> SpanNearQuery spanQ1 = new SpanNearQuery(spqa,0, true);
> SpanNearQuery spanQ2 = new SpanNearQuery(spqa,0, false);
> System.out.println(spanQ1);
> TopDocs tdocs1 = searcher.search(spanQ1,10);
> assertEquals(tdocs1.totalHits ,1);
> System.out.println(spanQ2);
> TopDocs tdocs2 = searcher.search(spanQ2,10);
> //I'd expect one hit here:
> assertEquals(tdocs2.totalHits ,1); // Assertion fails
> }
> }
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]