Re: highlighting search terms

Erik Hatcher Tue, 09 May 2006 08:12:13 -0700


On May 8, 2006, at 4:07 PM, Chris Hostetter wrote:

: I'd like to integrate the Lucene highlighting classes into Solr. Can
: anybody offer suggestions on where to start? I saw this on the Solr
: task list, so there must be someone on this list who has at least
: thought about this problem.


I'd been discussed, and i think Erik had an initial implimentation he
wasn't particularly happy with (but sounded decent to me).

Right, I implemented this (patch pasted below) but with hardcodedfield name and by retrieving the document again. The highlightedtext appears in a separate section than the actual hits, but alignedwith them sequentially. A more standard and configurable solution,and of course one that is more efficient in getting the field data(from the field cache I presume) would be better, but so far itsworking well enough for a client demo.


        Erik



package org.apache.solr.request;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.highlight.*;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
+import java.util.ArrayList;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.io.IOException;
+import java.io.StringReader;
import java.net.URL;
import org.apache.solr.util.StrUtils;
@@ -101,7 +109,8 @@
         }
       }

- DocList results = req.getSearcher().getDocList(query, null,sort, req.getStart(), req.getLimit(), flags);

+      SolrIndexSearcher searcher = req.getSearcher();

+ DocList results = searcher.getDocList(query, null, sort,req.getStart(), req.getLimit(), flags);

       rsp.add(null,results);
       if (debug!=null) {
@@ -123,6 +132,20 @@
         rsp.add("debug",dbg);
       }
+      boolean toHighlight = true;
+      if (toHighlight) {
+        DocIterator iterator = results.iterator();
+        ArrayList highlightedFields = new ArrayList();
+        while (iterator.hasNext()) {
+          int id = iterator.nextDoc();
+          Document doc = searcher.doc(id);

+ highlightedFields.add(highlight(doc.getField("body"),query));

+        }
+
+        rsp.add("highlightedBody", highlightedFields);
+      }
+
+
     } catch (SolrException e) {
       rsp.setException(e);
       numErrors++;
@@ -135,6 +158,22 @@
     }
   }

+ private Object highlight(Field field, Query query) throwsIOException {

+    QueryScorer scorer = new QueryScorer(query);
+    SimpleHTMLFormatter formatter =
+        new SimpleHTMLFormatter("<span class=\"highlight\">",
+            "</span>");
+    Highlighter highlighter = new Highlighter(formatter, scorer);
+    Fragmenter fragmenter = new SimpleFragmenter(50);
+    highlighter.setTextFragmenter(fragmenter);
+
+    String value = field.stringValue();
+    TokenStream tokenStream = new StandardAnalyzer()
+        .tokenStream(field.name(), new StringReader(value));
+
+    return highlighter.getBestFragments(tokenStream, value, 5, "...");
+  }
+

private NamedList getExplainList(Query query, DocList results,SolrIndexSearcher searcher, IndexSchema schema) throws IOException {

     NamedList explainList = new NamedList();
     DocIterator iterator = results.iterator();

Re: highlighting search terms

Reply via email to