[
https://issues.apache.org/jira/browse/LUCENE-1001?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12648259#action_12648259
]
Jonathan Mamou commented on LUCENE-1001:
----------------------------------------
Hi,
Here is the relevant code.
I would expect to obtain
10
pos: 10
pos: 11
while I obtain
10
pos: 0
pos: 11
import java.io.StringReader;
import java.util.Collection;
import java.util.Iterator;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.PayloadSpans;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
public class Test {
public static void main (String args[]) throws Exception{
IndexWriter writer = new IndexWriter(args[0], new
TestPayloadAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add();new Field("content", new StringReader("a b c d e f g
h i j a k")));
writer.addDocument(doc);
writer.close();
IndexSearcher is = new IndexSearcher(args[0]););
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"
));
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"
));
SpanQuery[] sqs = {stq1,stq2};
SpanNearQuery snq = new SpanNearQuery(sqs,1,true);
PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
TopDocs topDocs = is.search(snq,1);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while) (spans.next()) {
System.out.println(spans.start());
Collection<byte[]> payloads = spans.getPayload();
for (Iterator<byte[]> it = payloads.iterator();
it.hasNext();) {
System.out.println(new String(it.next()));
}
}}
}
}
}}
-------------------------------------------------------------------------------------------------------------------------------------
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;
public class TestPayloadAnalyzer extends Analyzer {
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new LowerCaseTokenizer(reader);
result = new PayloadFilter(result, fieldName);
return result;
}
}
class PayloadFilter extends TokenFilter {
String fieldName;
int pos;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
this.fieldName = fieldName;
pos = 0;
}
public Token next() throws IOException {
Token result = input.next();
if (result != null) {
String token = new String(result.termBuffer(), 0, result.termLength
());
result.setPayload(),new Payload(("pos: " + pos).getBytes()));
pos += result.getPositionIncrement();
}
return} result;
}
}
Jonathan
> Add Payload retrieval to Spans
> ------------------------------
>
> Key: LUCENE-1001
> URL: https://issues.apache.org/jira/browse/LUCENE-1001
> Project: Lucene - Java
> Issue Type: New Feature
> Components: Search
> Reporter: Grant Ingersoll
> Assignee: Grant Ingersoll
> Priority: Minor
> Fix For: 2.4
>
> Attachments: LUCENE-1001.patch, LUCENE-1001.patch, LUCENE-1001.patch,
> LUCENE-1001.patch, LUCENE-1001.patch, LUCENE-1001.patch, LUCENE-1001.patch,
> LUCENE-1001.patch
>
>
> It will be nice to have access to payloads when doing SpanQuerys.
> See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 and
> http://www.gossamer-threads.com/lists/lucene/java-dev/51134
> Current API, added to Spans.java is below. I will try to post a patch as
> soon as I can figure out how to make it work for unordered spans (I believe I
> have all the other cases working).
> {noformat}
> /**
> * Returns the payload data for the current span.
> * This is invalid until [EMAIL PROTECTED] #next()} is called for
> * the first time.
> * This method must not be called more than once after each call
> * of [EMAIL PROTECTED] #next()}. However, payloads are loaded lazily,
> * so if the payload data for the current position is not needed,
> * this method may not be called at all for performance reasons.<br>
> * <br>
> * <p><font color="#FF0000">
> * WARNING: The status of the <b>Payloads</b> feature is experimental.
> * The APIs introduced here might change in the future and will not be
> * supported anymore in such a case.</font>
> *
> * @return a List of byte arrays containing the data of this payload
> * @throws IOException
> */
> // TODO: Remove warning after API has been finalized
> List/*<byte[]>*/ getPayload() throws IOException;
> /**
> * Checks if a payload can be loaded at this position.
> * <p/>
> * Payloads can only be loaded once per call to
> * [EMAIL PROTECTED] #next()}.
> * <p/>
> * <p><font color="#FF0000">
> * WARNING: The status of the <b>Payloads</b> feature is experimental.
> * The APIs introduced here might change in the future and will not be
> * supported anymore in such a case.</font>
> *
> * @return true if there is a payload available at this position that can
> be loaded
> */
> // TODO: Remove warning after API has been finalized
> public boolean isPayloadAvailable();
> {noformat}
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]