Author: klaas
Date: Wed Feb 21 17:38:47 2007
New Revision: 510338
URL: http://svn.apache.org/viewvc?view=rev&rev=510338
Log:
- moved highlighting-specific classes to Highlighting Utils.java
- brace consistency in HU.java
- removed unused final static in SolrPArams (should highlighting
param defs be moved here?)
Modified:
lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java
lucene/solr/trunk/src/java/org/apache/solr/util/
HighlightingUtils.java
lucene/solr/trunk/src/java/org/apache/solr/util/
SolrPluginUtils.java
Modified: lucene/solr/trunk/src/java/org/apache/solr/request/
SolrParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/
apache/solr/request/SolrParams.java?
view=diff&rev=510338&r1=510337&r2=510338
======================================================================
========
--- lucene/solr/trunk/src/java/org/apache/solr/request/
SolrParams.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/
SolrParams.java Wed Feb 21 17:38:47 2007
@@ -60,14 +60,6 @@
public static final String DEBUG_QUERY = "debugQuery";
/** another query to explain against */
public static final String EXPLAIN_OTHER = "explainOther";
- /** wether to highlight */
- public static final String HIGHLIGHT = "highlight";
- /** fields to highlight */
- public static final String HIGHLIGHT_FIELDS = "highlightFields";
- /** maximum highlight fragments to return */
- public static final String MAX_SNIPPETS = "maxSnippets";
- /** override default highlight Formatter class */
- public static final String HIGHLIGHT_FORMATTER_CLASS =
"highlightFormatterClass";
/**
* Should facet counts be calculated?
Modified: lucene/solr/trunk/src/java/org/apache/solr/util/
HighlightingUtils.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/
apache/solr/util/HighlightingUtils.java?
view=diff&rev=510338&r1=510337&r2=510338
======================================================================
========
--- lucene/solr/trunk/src/java/org/apache/solr/util/
HighlightingUtils.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/
HighlightingUtils.java Wed Feb 21 17:38:47 2007
@@ -18,11 +18,14 @@
import java.io.IOException;
import java.io.StringReader;
-import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.ArrayList;
+import java.util.ListIterator;
import org.apache.solr.request.*;
import org.apache.solr.search.DocIterator;
@@ -30,7 +33,7 @@
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.schema.SchemaField;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
@@ -38,8 +41,7 @@
/**
* Collection of Utility and Factory methods for Highlighting.
*/
-public class HighlightingUtils
-{
+public class HighlightingUtils {
private static final String SIMPLE = "simple";
private static final String HIGHLIGHT = "hl";
@@ -53,8 +55,7 @@
private static final String FIELD_MATCH = PREFIX
+"requireFieldMatch";
private static SolrParams DEFAULTS = null;
- static
- {
+ static {
Map<String,String> map = new HashMap<String,String>();
map.put(SNIPPETS, "1");
map.put(FRAGSIZE, "100");
@@ -66,8 +67,7 @@
}
/** Combine request parameters with highlighting defaults. */
- private static SolrParams getParams(SolrQueryRequest request)
- {
+ private static SolrParams getParams(SolrQueryRequest request) {
return new DefaultSolrParams(request.getParams(), DEFAULTS);
}
@@ -76,8 +76,7 @@
* @param request The current SolrQueryRequest
* @return <code>true</code> if highlighting enabled,
<code>false</code> if not.
*/
- public static boolean isHighlightingEnabled(SolrQueryRequest
request)
- {
+ public static boolean isHighlightingEnabled(SolrQueryRequest
request) {
return getParams(request).getBool(HIGHLIGHT, false);
}
@@ -87,8 +86,7 @@
* @param fieldName The name of the field
* @param request The current SolrQueryRequest
*/
- public static Highlighter getHighlighter(Query query, String
fieldName, SolrQueryRequest request)
- {
+ public static Highlighter getHighlighter(Query query, String
fieldName, SolrQueryRequest request) {
Highlighter highlighter = new Highlighter(
getFormatter(fieldName, request),
getQueryScorer(query, fieldName, request));
@@ -102,15 +100,12 @@
* @param fieldName The name of the field
* @param request The SolrQueryRequest
*/
- public static QueryScorer getQueryScorer(Query query, String
fieldName, SolrQueryRequest request)
- {
+ public static QueryScorer getQueryScorer(Query query, String
fieldName, SolrQueryRequest request) {
boolean reqFieldMatch = getParams(request).getFieldBool
(fieldName, FIELD_MATCH, false);
- if (reqFieldMatch)
- {
+ if (reqFieldMatch) {
return new QueryScorer(query, request.getSearcher
().getReader(), fieldName);
}
- else
- {
+ else {
return new QueryScorer(query);
}
}
@@ -123,25 +118,20 @@
* @param request The current SolrQueryRequest
* @param defaultFields Programmatic default highlight fields,
used if nothing is specified in the handler config or the request.
*/
- public static String[] getHighlightFields(Query query,
SolrQueryRequest request, String[] defaultFields)
- {
+ public static String[] getHighlightFields(Query query,
SolrQueryRequest request, String[] defaultFields) {
String fields[] = getParams(request).getParams(FIELDS);
// if no fields specified in the request, or the handler,
fall back to programmatic default, or default search field.
- if(emptyArray(fields))
- {
+ if(emptyArray(fields)) {
// use default search field if highlight fieldlist not
specified.
- if (emptyArray(defaultFields))
- {
+ if (emptyArray(defaultFields)) {
fields = new String[]{request.getSchema
().getDefaultSearchFieldName()};
}
- else
- {
+ else {
fields = defaultFields;
}
}
- else if (fields.length == 1)
- {
+ else if (fields.length == 1) {
// if there's a single request/handler value, it may be a
space/comma separated list
fields = SolrPluginUtils.split(fields[0]);
}
@@ -149,8 +139,7 @@
return fields;
}
- private static boolean emptyArray(String[] arr)
- {
+ private static boolean emptyArray(String[] arr) {
return (arr == null || arr.length == 0 || arr[0] == null ||
arr[0].trim().length() == 0);
}
@@ -161,8 +150,7 @@
* @param fieldName The name of the field
* @param request The current SolrQueryRequest
*/
- public static int getMaxSnippets(String fieldName,
SolrQueryRequest request)
- {
+ public static int getMaxSnippets(String fieldName,
SolrQueryRequest request) {
return Integer.parseInt(getParams(request).getFieldParam
(fieldName, SNIPPETS));
}
@@ -175,8 +163,7 @@
* @param request The current SolrQueryRequest
* @return An appropriate Formatter.
*/
- public static Formatter getFormatter(String fieldName,
SolrQueryRequest request)
- {
+ public static Formatter getFormatter(String fieldName,
SolrQueryRequest request) {
SolrParams p = getParams(request);
// SimpleHTMLFormatter is the only supported Formatter at
the moment
@@ -192,8 +179,7 @@
* @param request The current SolrQueryRequest
* @return An appropriate Fragmenter.
*/
- public static Fragmenter getFragmenter(String fieldName,
SolrQueryRequest request)
- {
+ public static Fragmenter getFragmenter(String fieldName,
SolrQueryRequest request) {
int fragsize = Integer.parseInt(getParams
(request).getFieldParam(fieldName, FRAGSIZE));
return (fragsize <= 0) ? new NullFragmenter() : new
GapFragmenter(fragsize);
}
@@ -210,8 +196,7 @@
* @return NamedList containing a NamedList for each document,
which in
* turns contains sets (field, summary) pairs.
*/
- public static NamedList doHighlighting(DocList docs, Query
query, SolrQueryRequest req, String[] defaultFields) throws
IOException
- {
+ public static NamedList doHighlighting(DocList docs, Query
query, SolrQueryRequest req, String[] defaultFields) throws
IOException {
if (!isHighlightingEnabled(req))
return null;
@@ -232,13 +217,11 @@
// Highlight each document
DocIterator iterator = docs.iterator();
- for (int i = 0; i < docs.size(); i++)
- {
+ for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc();
Document doc = readDocs[i];
NamedList docSummaries = new SimpleOrderedMap();
- for (String fieldName : fieldNames)
- {
+ for (String fieldName : fieldNames) {
fieldName = fieldName.trim();
String[] docTexts = doc.getValues(fieldName);
if (docTexts == null) continue;
@@ -249,24 +232,20 @@
String[] summaries;
TextFragment[] frag;
- if (docTexts.length == 1)
- {
+ if (docTexts.length == 1) {
// single-valued field
TokenStream tstream;
- try
- {
+ try {
// attempt term vectors
tstream = TokenSources.getTokenStream
(searcher.getReader(), docId, fieldName);
}
- catch (IllegalArgumentException e)
- {
+ catch (IllegalArgumentException e) {
// fall back to analyzer
tstream = new TokenOrderingFilter
(searcher.getSchema().getAnalyzer().tokenStream(fieldName, new
StringReader(docTexts[0])), 10);
}
frag = highlighter.getBestTextFragments(tstream,
docTexts[0], false, numFragments);
}
- else
- {
+ else {
// multi-valued field
MultiValueTokenStream tstream;
tstream = new MultiValueTokenStream(fieldName,
docTexts, searcher.getSchema().getAnalyzer(), true);
@@ -274,18 +253,16 @@
}
// convert fragments back into text
// TODO: we can include score and position information
in output as snippet attributes
- if (frag.length > 0)
- {
+ if (frag.length > 0) {
ArrayList<String> fragTexts = new ArrayList<String>();
- for (int j = 0; j < frag.length; j++)
- {
- if ((frag[j] != null) && (frag[j].getScore() > 0))
- {
+ for (int j = 0; j < frag.length; j++) {
+ if ((frag[j] != null) && (frag[j].getScore() >
0)) {
fragTexts.add(frag[j].toString());
}
}
summaries = fragTexts.toArray(new String[0]);
- if (summaries.length > 0) docSummaries.add
(fieldName, summaries);
+ if (summaries.length > 0)
+ docSummaries.add(fieldName, summaries);
}
}
String printId = searcher.getSchema().printableUniqueKey
(doc);
@@ -293,4 +270,161 @@
}
return fragments;
}
+}
+
+/**
+ * Helper class which creates a single TokenStream out of values
from a
+ * multi-valued field.
+ */
+class MultiValueTokenStream extends TokenStream {
+ private String fieldName;
+ private String[] values;
+ private Analyzer analyzer;
+ private int curIndex; // next index into the
values array
+ private int curOffset; // offset into
concatenated string
+ private TokenStream currentStream; // tokenStream currently
being iterated
+ private boolean orderTokenOffsets;
+
+ /** Constructs a TokenStream for consecutively-analyzed field
values
+ *
+ * @param fieldName name of the field
+ * @param values array of field data
+ * @param analyzer analyzer instance
+ */
+ public MultiValueTokenStream(String fieldName, String[] values,
+ Analyzer analyzer, boolean
orderTokenOffsets) {
+ this.fieldName = fieldName;
+ this.values = values;
+ this.analyzer = analyzer;
+ curIndex = -1;
+ curOffset = 0;
+ currentStream = null;
+ this.orderTokenOffsets=orderTokenOffsets;
+ }
+
+ /** Returns the next token in the stream, or null at EOS. */
+ public Token next() throws IOException {
+ int extra = 0;
+ if(currentStream == null) {
+ curIndex++;
+ if(curIndex < values.length) {
+ currentStream = analyzer.tokenStream(fieldName,
+ new StringReader
(values[curIndex]));
+ if (orderTokenOffsets) currentStream = new
TokenOrderingFilter(currentStream,10);
+ // add extra space between multiple values
+ if(curIndex > 0)
+ extra = analyzer.getPositionIncrementGap(fieldName);
+ } else {
+ return null;
+ }
+ }
+ Token nextToken = currentStream.next();
+ if(nextToken == null) {
+ curOffset += values[curIndex].length();
+ currentStream = null;
+ return next();
+ }
+ // create an modified token which is the offset into the
concatenated
+ // string of all values
+ Token offsetToken = new Token(nextToken.termText(),
+ nextToken.startOffset() +
curOffset,
+ nextToken.endOffset() + curOffset);
+ offsetToken.setPositionIncrement(nextToken.getPositionIncrement
() + extra*10);
+ return offsetToken;
+ }
+
+ /**
+ * Returns all values as a single String into which the Tokens
index with
+ * their offsets.
+ */
+ public String asSingleValue() {
+ StringBuilder sb = new StringBuilder();
+ for(String str : values)
+ sb.append(str);
+ return sb.toString();
+ }
+
+}
+
+/**
+ * A simple modification of SimpleFragmenter which additionally
creates new
+ * fragments when an unusually-large position increment is
encountered
+ * (this behaves much better in the presence of multi-valued fields).
+ */
+class GapFragmenter extends SimpleFragmenter {
+ /**
+ * When a gap in term positions is observed that is at least
this big, treat
+ * the gap as a fragment delimiter.
+ */
+ public static final int INCREMENT_THRESHOLD = 50;
+ protected int fragOffsetAccum = 0;
+
+ public GapFragmenter() {
+ }
+
+ public GapFragmenter(int fragsize) {
+ super(fragsize);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.TextFragmenter#start
(java.lang.String)
+ */
+ public void start(String originalText) {
+ fragOffsetAccum = 0;
+ }
+
+ /* (non-Javadoc)
+ * @see
org.apache.lucene.search.highlight.TextFragmenter#isNewFragment
(org.apache.lucene.analysis.Token)
+ */
+ public boolean isNewFragment(Token token) {
+ boolean isNewFrag =
+ token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
+ token.getPositionIncrement() > INCREMENT_THRESHOLD;
+ if(isNewFrag) {
+ fragOffsetAccum += token.endOffset() - fragOffsetAccum;
+ }
+ return isNewFrag;
+ }
+}
+
+/** Orders Tokens in a window first by their startOffset ascending.
+ * endOffset is currently ignored.
+ * This is meant to work around fickleness in the highlighter
only. It
+ * can mess up token positions and should not be used for indexing
or querying.
+ */
+class TokenOrderingFilter extends TokenFilter {
+ private final int windowSize;
+ private final LinkedList<Token> queue = new LinkedList<Token>();
+ private boolean done=false;
+
+ protected TokenOrderingFilter(TokenStream input, int windowSize) {
+ super(input);
+ this.windowSize = windowSize;
+ }
+
+ public Token next() throws IOException {
+ while (!done && queue.size() < windowSize) {
+ Token newTok = input.next();
+ if (newTok==null) {
+ done=true;
+ break;
+ }
+
+ // reverse iterating for better efficiency since we know the
+ // list is already sorted, and most token start offsets will
be too.
+ ListIterator<Token> iter = queue.listIterator(queue.size());
+ while(iter.hasPrevious()) {
+ if (newTok.startOffset() >= iter.previous().startOffset()) {
+ // insertion will be before what next() would return (what
+ // we just compared against), so move back one so the
insertion
+ // will be after.
+ iter.next();
+ break;
+ }
+ }
+ iter.add(newTok);
+ }
+
+ return queue.isEmpty() ? null : queue.removeFirst();
+ }
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/util/
SolrPluginUtils.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/
apache/solr/util/SolrPluginUtils.java?
view=diff&rev=510338&r1=510337&r2=510338
======================================================================
========
--- lucene/solr/trunk/src/java/org/apache/solr/util/
SolrPluginUtils.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/
SolrPluginUtils.java Wed Feb 21 17:38:47 2007
@@ -18,15 +18,11 @@
package org.apache.solr.util;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.highlight.*;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import org.apache.solr.request.SolrParams;
@@ -849,156 +845,5 @@
}
-/**
- * Helper class which creates a single TokenStream out of values
from a
- * multi-valued field.
- */
-class MultiValueTokenStream extends TokenStream {
- private String fieldName;
- private String[] values;
- private Analyzer analyzer;
- private int curIndex; // next index into the
values array
- private int curOffset; // offset into
concatenated string
- private TokenStream currentStream; // tokenStream currently
being iterated
- private boolean orderTokenOffsets;
-
- /** Constructs a TokenStream for consecutively-analyzed field
values
- *
- * @param fieldName name of the field
- * @param values array of field data
- * @param analyzer analyzer instance
- */
- public MultiValueTokenStream(String fieldName, String[] values,
- Analyzer analyzer, boolean
orderTokenOffsets) {
- this.fieldName = fieldName;
- this.values = values;
- this.analyzer = analyzer;
- curIndex = -1;
- curOffset = 0;
- currentStream = null;
- this.orderTokenOffsets=orderTokenOffsets;
- }
-
- /** Returns the next token in the stream, or null at EOS. */
- public Token next() throws IOException {
- int extra = 0;
- if(currentStream == null) {
- curIndex++;
- if(curIndex < values.length) {
- currentStream = analyzer.tokenStream(fieldName,
- new StringReader
(values[curIndex]));
- if (orderTokenOffsets) currentStream = new
TokenOrderingFilter(currentStream,10);
- // add extra space between multiple values
- if(curIndex > 0)
- extra = analyzer.getPositionIncrementGap(fieldName);
- } else {
- return null;
- }
- }
- Token nextToken = currentStream.next();
- if(nextToken == null) {
- curOffset += values[curIndex].length();
- currentStream = null;
- return next();
- }
- // create an modified token which is the offset into the
concatenated
- // string of all values
- Token offsetToken = new Token(nextToken.termText(),
- nextToken.startOffset() +
curOffset,
- nextToken.endOffset() + curOffset);
- offsetToken.setPositionIncrement(nextToken.getPositionIncrement
() + extra*10);
- return offsetToken;
- }
-
- /**
- * Returns all values as a single String into which the Tokens
index with
- * their offsets.
- */
- public String asSingleValue() {
- StringBuilder sb = new StringBuilder();
- for(String str : values)
- sb.append(str);
- return sb.toString();
- }
-
-}
-
-/**
- * A simple modification of SimpleFragmenter which additionally
creates new
- * fragments when an unusually-large position increment is
encountered
- * (this behaves much better in the presence of multi-valued fields).
- */
-class GapFragmenter extends SimpleFragmenter {
- public static final int INCREMENT_THRESHOLD = 50;
- protected int fragOffsetAccum = 0;
-
- public GapFragmenter() {
- }
-
- public GapFragmenter(int fragsize) {
- super(fragsize);
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.search.highlight.TextFragmenter#start
(java.lang.String)
- */
- public void start(String originalText) {
- fragOffsetAccum = 0;
- }
- /* (non-Javadoc)
- * @see
org.apache.lucene.search.highlight.TextFragmenter#isNewFragment
(org.apache.lucene.analysis.Token)
- */
- public boolean isNewFragment(Token token) {
- boolean isNewFrag =
- token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
- token.getPositionIncrement() > INCREMENT_THRESHOLD;
- if(isNewFrag) {
- fragOffsetAccum += token.endOffset() - fragOffsetAccum;
- }
- return isNewFrag;
- }
-}
-
-/** Orders Tokens in a window first by their startOffset ascending.
- * endOffset is currently ignored.
- * This is meant to work around fickleness in the highlighter
only. It
- * can mess up token positions and should not be used for indexing
or querying.
- */
-class TokenOrderingFilter extends TokenFilter {
- private final int windowSize;
- private final LinkedList<Token> queue = new LinkedList<Token>();
- private boolean done=false;
-
- protected TokenOrderingFilter(TokenStream input, int windowSize) {
- super(input);
- this.windowSize = windowSize;
- }
-
- public Token next() throws IOException {
- while (!done && queue.size() < windowSize) {
- Token newTok = input.next();
- if (newTok==null) {
- done=true;
- break;
- }
-
- // reverse iterating for better efficiency since we know the
- // list is already sorted, and most token start offsets will
be too.
- ListIterator<Token> iter = queue.listIterator(queue.size());
- while(iter.hasPrevious()) {
- if (newTok.startOffset() >= iter.previous().startOffset()) {
- // insertion will be before what next() would return (what
- // we just compared against), so move back one so the
insertion
- // will be after.
- iter.next();
- break;
- }
- }
- iter.add(newTok);
- }
-
- return queue.isEmpty() ? null : queue.removeFirst();
- }
-}