On May 8, 2006, at 4:07 PM, Chris Hostetter wrote:
: I'd like to integrate the Lucene highlighting classes into Solr. Can
: anybody offer suggestions on where to start? I saw this on the Solr
: task list, so there must be someone on this list who has at least
: thought about this problem.

I'd been discussed, and i think Erik had an initial implimentation he
wasn't particularly happy with (but sounded decent to me).


Right, I implemented this (patch pasted below) but with hardcoded field name and by retrieving the document again. The highlighted text appears in a separate section than the actual hits, but aligned with them sequentially. A more standard and configurable solution, and of course one that is more efficient in getting the field data (from the field cache I presume) would be better, but so far its working well enough for a client demo.

        Erik



package org.apache.solr.request;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.highlight.*;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
+import java.util.ArrayList;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.io.IOException;
+import java.io.StringReader;
import java.net.URL;
import org.apache.solr.util.StrUtils;
@@ -101,7 +109,8 @@
         }
       }
- DocList results = req.getSearcher().getDocList(query, null, sort, req.getStart(), req.getLimit(), flags);
+      SolrIndexSearcher searcher = req.getSearcher();
+ DocList results = searcher.getDocList(query, null, sort, req.getStart(), req.getLimit(), flags);
       rsp.add(null,results);
       if (debug!=null) {
@@ -123,6 +132,20 @@
         rsp.add("debug",dbg);
       }
+      boolean toHighlight = true;
+      if (toHighlight) {
+        DocIterator iterator = results.iterator();
+        ArrayList highlightedFields = new ArrayList();
+        while (iterator.hasNext()) {
+          int id = iterator.nextDoc();
+          Document doc = searcher.doc(id);
+ highlightedFields.add(highlight(doc.getField("body"), query));
+        }
+
+        rsp.add("highlightedBody", highlightedFields);
+      }
+
+
     } catch (SolrException e) {
       rsp.setException(e);
       numErrors++;
@@ -135,6 +158,22 @@
     }
   }
+ private Object highlight(Field field, Query query) throws IOException {
+    QueryScorer scorer = new QueryScorer(query);
+    SimpleHTMLFormatter formatter =
+        new SimpleHTMLFormatter("<span class=\"highlight\">",
+            "</span>");
+    Highlighter highlighter = new Highlighter(formatter, scorer);
+    Fragmenter fragmenter = new SimpleFragmenter(50);
+    highlighter.setTextFragmenter(fragmenter);
+
+    String value = field.stringValue();
+    TokenStream tokenStream = new StandardAnalyzer()
+        .tokenStream(field.name(), new StringReader(value));
+
+    return highlighter.getBestFragments(tokenStream, value, 5, "...");
+  }
+
private NamedList getExplainList(Query query, DocList results, SolrIndexSearcher searcher, IndexSchema schema) throws IOException {
     NamedList explainList = new NamedList();
     DocIterator iterator = results.iterator();

Reply via email to